diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..df9efad --- /dev/null +++ b/.gitignore @@ -0,0 +1,116 @@ +# Initially taken from Github's Python gitignore file + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..cf30ded --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,5 @@ + +# Default ignored files +/workspace.xml +# Datasource local storage ignored files +/dataSources.local.xml \ No newline at end of file diff --git a/.idea/bert.iml b/.idea/bert.iml new file mode 100644 index 0000000..6a3f7ec --- /dev/null +++ b/.idea/bert.iml @@ -0,0 +1,11 @@ + + + + + + + + + + \ No newline at end of file diff --git a/.idea/dataSources.xml b/.idea/dataSources.xml new file mode 100644 index 0000000..eec2c94 --- /dev/null +++ b/.idea/dataSources.xml @@ -0,0 +1,11 @@ + + + + + sqlite.xerial + true + org.sqlite.JDBC + jdbc:sqlite:C:\Users\Administrator\Documents\GitHub\bert\bptdata.db + + + \ No newline at end of file diff --git a/.idea/dictionaries/Administrator.xml b/.idea/dictionaries/Administrator.xml new file mode 100644 index 0000000..b033f19 --- /dev/null +++ b/.idea/dictionaries/Administrator.xml @@ -0,0 +1,22 @@ + + + + amki + asctime + badrequest + bptdata + codedream + epaper + epout + eppdt + eppdtout + eppredict + idcode + levelname + nlpdata + sckstn + stnid + stns + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..fb94267 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,13 @@ + + + + + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..e84c31f --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/other.xml b/.idea/other.xml new file mode 100644 index 0000000..640fd80 --- /dev/null +++ b/.idea/other.xml @@ -0,0 +1,7 @@ + + + + + \ No newline at end of file diff --git a/.idea/sqldialects.xml b/.idea/sqldialects.xml new file mode 100644 index 0000000..5b66d9a --- /dev/null +++ b/.idea/sqldialects.xml @@ -0,0 +1,7 @@ + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..effb57b --- /dev/null +++ b/__init__.py @@ -0,0 +1,15 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/bptdata.db b/bptdata.db new file mode 100644 index 0000000..7e4b91d Binary files /dev/null and b/bptdata.db differ diff --git a/chinese_wwm_ext_L-12_H-768_A-12/bert_config.json b/chinese_wwm_ext_L-12_H-768_A-12/bert_config.json new file mode 100644 index 0000000..adb75ff --- /dev/null +++ b/chinese_wwm_ext_L-12_H-768_A-12/bert_config.json @@ -0,0 +1,19 @@ +{ + "attention_probs_dropout_prob": 0.1, + "directionality": "bidi", + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "max_position_embeddings": 512, + "num_attention_heads": 12, + "num_hidden_layers": 12, + "pooler_fc_size": 768, + "pooler_num_attention_heads": 12, + "pooler_num_fc_layers": 3, + "pooler_size_per_head": 128, + "pooler_type": "first_token_transform", + "type_vocab_size": 2, + "vocab_size": 21128 +} diff --git a/chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt.index b/chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt.index new file mode 100644 index 0000000..8c80a01 Binary files /dev/null and b/chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt.index differ diff --git a/chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt.meta b/chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt.meta new file mode 100644 index 0000000..7b374b5 Binary files /dev/null and b/chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt.meta differ diff --git a/chinese_wwm_ext_L-12_H-768_A-12/vocab.txt b/chinese_wwm_ext_L-12_H-768_A-12/vocab.txt new file mode 100644 index 0000000..ca4f978 --- /dev/null +++ b/chinese_wwm_ext_L-12_H-768_A-12/vocab.txt @@ -0,0 +1,21128 @@ +[PAD] +[unused1] +[unused2] +[unused3] +[unused4] +[unused5] +[unused6] +[unused7] +[unused8] +[unused9] +[unused10] +[unused11] +[unused12] +[unused13] +[unused14] +[unused15] +[unused16] +[unused17] +[unused18] +[unused19] +[unused20] +[unused21] +[unused22] +[unused23] +[unused24] +[unused25] +[unused26] +[unused27] +[unused28] +[unused29] +[unused30] +[unused31] +[unused32] +[unused33] +[unused34] +[unused35] +[unused36] +[unused37] +[unused38] +[unused39] +[unused40] +[unused41] +[unused42] +[unused43] +[unused44] +[unused45] +[unused46] +[unused47] +[unused48] +[unused49] +[unused50] +[unused51] +[unused52] +[unused53] +[unused54] +[unused55] +[unused56] +[unused57] +[unused58] +[unused59] +[unused60] +[unused61] +[unused62] +[unused63] +[unused64] +[unused65] +[unused66] +[unused67] +[unused68] +[unused69] +[unused70] +[unused71] +[unused72] +[unused73] +[unused74] +[unused75] +[unused76] +[unused77] +[unused78] +[unused79] +[unused80] +[unused81] +[unused82] +[unused83] +[unused84] +[unused85] +[unused86] +[unused87] +[unused88] +[unused89] +[unused90] +[unused91] +[unused92] +[unused93] +[unused94] +[unused95] +[unused96] +[unused97] +[unused98] +[unused99] +[UNK] +[CLS] +[SEP] +[MASK] + + +! +" +# +$ +% +& +' +( +) +* ++ +, +- +. +/ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +: +; +< += +> +? +@ +[ +\ +] +^ +_ +a +b +c +d +e +f +g +h +i +j +k +l +m +n +o +p +q +r +s +t +u +v +w +x +y +z +{ +| +} +~ +£ +¤ +¥ +§ +© +« +® +° +± +² +³ +µ +· +¹ +º +» +¼ +× +ß +æ +÷ +ø +đ +ŋ +ɔ +ə +ɡ +ʰ +ˇ +ˈ +ˊ +ˋ +ˍ +ː +˙ +˚ +ˢ +α +β +γ +δ +ε +η +θ +ι +κ +λ +μ +ν +ο +π +ρ +ς +σ +τ +υ +φ +χ +ψ +ω +а +б +в +г +д +е +ж +з +и +к +л +м +н +о +п +р +с +т +у +ф +х +ц +ч +ш +ы +ь +я +і +ا +ب +ة +ت +د +ر +س +ع +ل +م +ن +ه +و +ي +۩ +ก +ง +น +ม +ย +ร +อ +า +เ +๑ +་ +ღ +ᄀ +ᄁ +ᄂ +ᄃ +ᄅ +ᄆ +ᄇ +ᄈ +ᄉ +ᄋ +ᄌ +ᄎ +ᄏ +ᄐ +ᄑ +ᄒ +ᅡ +ᅢ +ᅣ +ᅥ +ᅦ +ᅧ +ᅨ +ᅩ +ᅪ +ᅬ +ᅭ +ᅮ +ᅯ +ᅲ +ᅳ +ᅴ +ᅵ +ᆨ +ᆫ +ᆯ +ᆷ +ᆸ +ᆺ +ᆻ +ᆼ +ᗜ +ᵃ +ᵉ +ᵍ +ᵏ +ᵐ +ᵒ +ᵘ +‖ +„ +† +• +‥ +‧ +
 +‰ +′ +″ +‹ +› +※ +‿ +⁄ +ⁱ +⁺ +ⁿ +₁ +₂ +₃ +₄ +€ +℃ +№ +™ +ⅰ +ⅱ +ⅲ +ⅳ +ⅴ +← +↑ +→ +↓ +↔ +↗ +↘ +⇒ +∀ +− +∕ +∙ +√ +∞ +∟ +∠ +∣ +∥ +∩ +∮ +∶ +∼ +∽ +≈ +≒ +≡ +≤ +≥ +≦ +≧ +≪ +≫ +⊙ +⋅ +⋈ +⋯ +⌒ +① +② +③ +④ +⑤ +⑥ +⑦ +⑧ +⑨ +⑩ +⑴ +⑵ +⑶ +⑷ +⑸ +⒈ +⒉ +⒊ +⒋ +ⓒ +ⓔ +ⓘ +─ +━ +│ +┃ +┅ +┆ +┊ +┌ +└ +├ +┣ +═ +║ +╚ +╞ +╠ +╭ +╮ +╯ +╰ +╱ +╳ +▂ +▃ +▅ +▇ +█ +▉ +▋ +▌ +▍ +▎ +■ +□ +▪ +▫ +▬ +▲ +△ +▶ +► +▼ +▽ +◆ +◇ +○ +◎ +● +◕ +◠ +◢ +◤ +☀ +★ +☆ +☕ +☞ +☺ +☼ +♀ +♂ +♠ +♡ +♣ +♥ +♦ +♪ +♫ +♬ +✈ +✔ +✕ +✖ +✦ +✨ +✪ +✰ +✿ +❀ +❤ +➜ +➤ +⦿ +、 +。 +〃 +々 +〇 +〈 +〉 +《 +》 +「 +」 +『 +』 +【 +】 +〓 +〔 +〕 +〖 +〗 +〜 +〝 +〞 +ぁ +あ +ぃ +い +う +ぇ +え +お +か +き +く +け +こ +さ +し +す +せ +そ +た +ち +っ +つ +て +と +な +に +ぬ +ね +の +は +ひ +ふ +へ +ほ +ま +み +む +め +も +ゃ +や +ゅ +ゆ +ょ +よ +ら +り +る +れ +ろ +わ +を +ん +゜ +ゝ +ァ +ア +ィ +イ +ゥ +ウ +ェ +エ +ォ +オ +カ +キ +ク +ケ +コ +サ +シ +ス +セ +ソ +タ +チ +ッ +ツ +テ +ト +ナ +ニ +ヌ +ネ +ノ +ハ +ヒ +フ +ヘ +ホ +マ +ミ +ム +メ +モ +ャ +ヤ +ュ +ユ +ョ +ヨ +ラ +リ +ル +レ +ロ +ワ +ヲ +ン +ヶ +・ +ー +ヽ +ㄅ +ㄆ +ㄇ +ㄉ +ㄋ +ㄌ +ㄍ +ㄎ +ㄏ +ㄒ +ㄚ +ㄛ +ㄞ +ㄟ +ㄢ +ㄤ +ㄥ +ㄧ +ㄨ +ㆍ +㈦ +㊣ +㎡ +㗎 +一 +丁 +七 +万 +丈 +三 +上 +下 +不 +与 +丐 +丑 +专 +且 +丕 +世 +丘 +丙 +业 +丛 +东 +丝 +丞 +丟 +両 +丢 +两 +严 +並 +丧 +丨 +个 +丫 +中 +丰 +串 +临 +丶 +丸 +丹 +为 +主 +丼 +丽 +举 +丿 +乂 +乃 +久 +么 +义 +之 +乌 +乍 +乎 +乏 +乐 +乒 +乓 +乔 +乖 +乗 +乘 +乙 +乜 +九 +乞 +也 +习 +乡 +书 +乩 +买 +乱 +乳 +乾 +亀 +亂 +了 +予 +争 +事 +二 +于 +亏 +云 +互 +五 +井 +亘 +亙 +亚 +些 +亜 +亞 +亟 +亡 +亢 +交 +亥 +亦 +产 +亨 +亩 +享 +京 +亭 +亮 +亲 +亳 +亵 +人 +亿 +什 +仁 +仃 +仄 +仅 +仆 +仇 +今 +介 +仍 +从 +仏 +仑 +仓 +仔 +仕 +他 +仗 +付 +仙 +仝 +仞 +仟 +代 +令 +以 +仨 +仪 +们 +仮 +仰 +仲 +件 +价 +任 +份 +仿 +企 +伉 +伊 +伍 +伎 +伏 +伐 +休 +伕 +众 +优 +伙 +会 +伝 +伞 +伟 +传 +伢 +伤 +伦 +伪 +伫 +伯 +估 +伴 +伶 +伸 +伺 +似 +伽 +佃 +但 +佇 +佈 +位 +低 +住 +佐 +佑 +体 +佔 +何 +佗 +佘 +余 +佚 +佛 +作 +佝 +佞 +佟 +你 +佢 +佣 +佤 +佥 +佩 +佬 +佯 +佰 +佳 +併 +佶 +佻 +佼 +使 +侃 +侄 +來 +侈 +例 +侍 +侏 +侑 +侖 +侗 +供 +依 +侠 +価 +侣 +侥 +侦 +侧 +侨 +侬 +侮 +侯 +侵 +侶 +侷 +便 +係 +促 +俄 +俊 +俎 +俏 +俐 +俑 +俗 +俘 +俚 +保 +俞 +俟 +俠 +信 +俨 +俩 +俪 +俬 +俭 +修 +俯 +俱 +俳 +俸 +俺 +俾 +倆 +倉 +個 +倌 +倍 +倏 +們 +倒 +倔 +倖 +倘 +候 +倚 +倜 +借 +倡 +値 +倦 +倩 +倪 +倫 +倬 +倭 +倶 +债 +值 +倾 +偃 +假 +偈 +偉 +偌 +偎 +偏 +偕 +做 +停 +健 +側 +偵 +偶 +偷 +偻 +偽 +偿 +傀 +傅 +傍 +傑 +傘 +備 +傚 +傢 +傣 +傥 +储 +傩 +催 +傭 +傲 +傳 +債 +傷 +傻 +傾 +僅 +働 +像 +僑 +僕 +僖 +僚 +僥 +僧 +僭 +僮 +僱 +僵 +價 +僻 +儀 +儂 +億 +儆 +儉 +儋 +儒 +儕 +儘 +償 +儡 +優 +儲 +儷 +儼 +儿 +兀 +允 +元 +兄 +充 +兆 +兇 +先 +光 +克 +兌 +免 +児 +兑 +兒 +兔 +兖 +党 +兜 +兢 +入 +內 +全 +兩 +八 +公 +六 +兮 +兰 +共 +兲 +关 +兴 +兵 +其 +具 +典 +兹 +养 +兼 +兽 +冀 +内 +円 +冇 +冈 +冉 +冊 +册 +再 +冏 +冒 +冕 +冗 +写 +军 +农 +冠 +冢 +冤 +冥 +冨 +冪 +冬 +冯 +冰 +冲 +决 +况 +冶 +冷 +冻 +冼 +冽 +冾 +净 +凄 +准 +凇 +凈 +凉 +凋 +凌 +凍 +减 +凑 +凛 +凜 +凝 +几 +凡 +凤 +処 +凪 +凭 +凯 +凰 +凱 +凳 +凶 +凸 +凹 +出 +击 +函 +凿 +刀 +刁 +刃 +分 +切 +刈 +刊 +刍 +刎 +刑 +划 +列 +刘 +则 +刚 +创 +初 +删 +判 +別 +刨 +利 +刪 +别 +刮 +到 +制 +刷 +券 +刹 +刺 +刻 +刽 +剁 +剂 +剃 +則 +剉 +削 +剋 +剌 +前 +剎 +剐 +剑 +剔 +剖 +剛 +剜 +剝 +剣 +剤 +剥 +剧 +剩 +剪 +副 +割 +創 +剷 +剽 +剿 +劃 +劇 +劈 +劉 +劊 +劍 +劏 +劑 +力 +劝 +办 +功 +加 +务 +劣 +动 +助 +努 +劫 +劭 +励 +劲 +劳 +労 +劵 +効 +劾 +势 +勁 +勃 +勇 +勉 +勋 +勐 +勒 +動 +勖 +勘 +務 +勛 +勝 +勞 +募 +勢 +勤 +勧 +勳 +勵 +勸 +勺 +勻 +勾 +勿 +匀 +包 +匆 +匈 +匍 +匐 +匕 +化 +北 +匙 +匝 +匠 +匡 +匣 +匪 +匮 +匯 +匱 +匹 +区 +医 +匾 +匿 +區 +十 +千 +卅 +升 +午 +卉 +半 +卍 +华 +协 +卑 +卒 +卓 +協 +单 +卖 +南 +単 +博 +卜 +卞 +卟 +占 +卡 +卢 +卤 +卦 +卧 +卫 +卮 +卯 +印 +危 +即 +却 +卵 +卷 +卸 +卻 +卿 +厂 +厄 +厅 +历 +厉 +压 +厌 +厕 +厘 +厚 +厝 +原 +厢 +厥 +厦 +厨 +厩 +厭 +厮 +厲 +厳 +去 +县 +叁 +参 +參 +又 +叉 +及 +友 +双 +反 +収 +发 +叔 +取 +受 +变 +叙 +叛 +叟 +叠 +叡 +叢 +口 +古 +句 +另 +叨 +叩 +只 +叫 +召 +叭 +叮 +可 +台 +叱 +史 +右 +叵 +叶 +号 +司 +叹 +叻 +叼 +叽 +吁 +吃 +各 +吆 +合 +吉 +吊 +吋 +同 +名 +后 +吏 +吐 +向 +吒 +吓 +吕 +吖 +吗 +君 +吝 +吞 +吟 +吠 +吡 +否 +吧 +吨 +吩 +含 +听 +吭 +吮 +启 +吱 +吳 +吴 +吵 +吶 +吸 +吹 +吻 +吼 +吽 +吾 +呀 +呂 +呃 +呆 +呈 +告 +呋 +呎 +呐 +呓 +呕 +呗 +员 +呛 +呜 +呢 +呤 +呦 +周 +呱 +呲 +味 +呵 +呷 +呸 +呻 +呼 +命 +咀 +咁 +咂 +咄 +咆 +咋 +和 +咎 +咏 +咐 +咒 +咔 +咕 +咖 +咗 +咘 +咙 +咚 +咛 +咣 +咤 +咦 +咧 +咨 +咩 +咪 +咫 +咬 +咭 +咯 +咱 +咲 +咳 +咸 +咻 +咽 +咿 +哀 +品 +哂 +哄 +哆 +哇 +哈 +哉 +哋 +哌 +响 +哎 +哏 +哐 +哑 +哒 +哔 +哗 +哟 +員 +哥 +哦 +哧 +哨 +哩 +哪 +哭 +哮 +哲 +哺 +哼 +哽 +唁 +唄 +唆 +唇 +唉 +唏 +唐 +唑 +唔 +唠 +唤 +唧 +唬 +售 +唯 +唰 +唱 +唳 +唷 +唸 +唾 +啃 +啄 +商 +啉 +啊 +問 +啓 +啕 +啖 +啜 +啞 +啟 +啡 +啤 +啥 +啦 +啧 +啪 +啫 +啬 +啮 +啰 +啱 +啲 +啵 +啶 +啷 +啸 +啻 +啼 +啾 +喀 +喂 +喃 +善 +喆 +喇 +喉 +喊 +喋 +喎 +喏 +喔 +喘 +喙 +喚 +喜 +喝 +喟 +喧 +喪 +喫 +喬 +單 +喰 +喱 +喲 +喳 +喵 +営 +喷 +喹 +喺 +喻 +喽 +嗅 +嗆 +嗇 +嗎 +嗑 +嗒 +嗓 +嗔 +嗖 +嗚 +嗜 +嗝 +嗟 +嗡 +嗣 +嗤 +嗦 +嗨 +嗪 +嗬 +嗯 +嗰 +嗲 +嗳 +嗶 +嗷 +嗽 +嘀 +嘅 +嘆 +嘈 +嘉 +嘌 +嘍 +嘎 +嘔 +嘖 +嘗 +嘘 +嘚 +嘛 +嘜 +嘞 +嘟 +嘢 +嘣 +嘤 +嘧 +嘩 +嘭 +嘮 +嘯 +嘰 +嘱 +嘲 +嘴 +嘶 +嘸 +嘹 +嘻 +嘿 +噁 +噌 +噎 +噓 +噔 +噗 +噙 +噜 +噠 +噢 +噤 +器 +噩 +噪 +噬 +噱 +噴 +噶 +噸 +噹 +噻 +噼 +嚀 +嚇 +嚎 +嚏 +嚐 +嚓 +嚕 +嚟 +嚣 +嚥 +嚨 +嚮 +嚴 +嚷 +嚼 +囂 +囉 +囊 +囍 +囑 +囔 +囗 +囚 +四 +囝 +回 +囟 +因 +囡 +团 +団 +囤 +囧 +囪 +囫 +园 +困 +囱 +囲 +図 +围 +囹 +固 +国 +图 +囿 +圃 +圄 +圆 +圈 +國 +圍 +圏 +園 +圓 +圖 +團 +圜 +土 +圣 +圧 +在 +圩 +圭 +地 +圳 +场 +圻 +圾 +址 +坂 +均 +坊 +坍 +坎 +坏 +坐 +坑 +块 +坚 +坛 +坝 +坞 +坟 +坠 +坡 +坤 +坦 +坨 +坪 +坯 +坳 +坵 +坷 +垂 +垃 +垄 +型 +垒 +垚 +垛 +垠 +垢 +垣 +垦 +垩 +垫 +垭 +垮 +垵 +埂 +埃 +埋 +城 +埔 +埕 +埗 +域 +埠 +埤 +埵 +執 +埸 +培 +基 +埼 +堀 +堂 +堃 +堅 +堆 +堇 +堑 +堕 +堙 +堡 +堤 +堪 +堯 +堰 +報 +場 +堵 +堺 +堿 +塊 +塌 +塑 +塔 +塗 +塘 +塚 +塞 +塢 +塩 +填 +塬 +塭 +塵 +塾 +墀 +境 +墅 +墉 +墊 +墒 +墓 +増 +墘 +墙 +墜 +增 +墟 +墨 +墩 +墮 +墳 +墻 +墾 +壁 +壅 +壆 +壇 +壊 +壑 +壓 +壕 +壘 +壞 +壟 +壢 +壤 +壩 +士 +壬 +壮 +壯 +声 +売 +壳 +壶 +壹 +壺 +壽 +处 +备 +変 +复 +夏 +夔 +夕 +外 +夙 +多 +夜 +够 +夠 +夢 +夥 +大 +天 +太 +夫 +夭 +央 +夯 +失 +头 +夷 +夸 +夹 +夺 +夾 +奂 +奄 +奇 +奈 +奉 +奋 +奎 +奏 +奐 +契 +奔 +奕 +奖 +套 +奘 +奚 +奠 +奢 +奥 +奧 +奪 +奬 +奮 +女 +奴 +奶 +奸 +她 +好 +如 +妃 +妄 +妆 +妇 +妈 +妊 +妍 +妒 +妓 +妖 +妘 +妙 +妝 +妞 +妣 +妤 +妥 +妨 +妩 +妪 +妮 +妲 +妳 +妹 +妻 +妾 +姆 +姉 +姊 +始 +姍 +姐 +姑 +姒 +姓 +委 +姗 +姚 +姜 +姝 +姣 +姥 +姦 +姨 +姪 +姫 +姬 +姹 +姻 +姿 +威 +娃 +娄 +娅 +娆 +娇 +娉 +娑 +娓 +娘 +娛 +娜 +娟 +娠 +娣 +娥 +娩 +娱 +娲 +娴 +娶 +娼 +婀 +婁 +婆 +婉 +婊 +婕 +婚 +婢 +婦 +婧 +婪 +婭 +婴 +婵 +婶 +婷 +婺 +婿 +媒 +媚 +媛 +媞 +媧 +媲 +媳 +媽 +媾 +嫁 +嫂 +嫉 +嫌 +嫑 +嫔 +嫖 +嫘 +嫚 +嫡 +嫣 +嫦 +嫩 +嫲 +嫵 +嫻 +嬅 +嬉 +嬌 +嬗 +嬛 +嬢 +嬤 +嬪 +嬰 +嬴 +嬷 +嬸 +嬿 +孀 +孃 +子 +孑 +孔 +孕 +孖 +字 +存 +孙 +孚 +孛 +孜 +孝 +孟 +孢 +季 +孤 +学 +孩 +孪 +孫 +孬 +孰 +孱 +孳 +孵 +學 +孺 +孽 +孿 +宁 +它 +宅 +宇 +守 +安 +宋 +完 +宏 +宓 +宕 +宗 +官 +宙 +定 +宛 +宜 +宝 +实 +実 +宠 +审 +客 +宣 +室 +宥 +宦 +宪 +宫 +宮 +宰 +害 +宴 +宵 +家 +宸 +容 +宽 +宾 +宿 +寂 +寄 +寅 +密 +寇 +富 +寐 +寒 +寓 +寛 +寝 +寞 +察 +寡 +寢 +寥 +實 +寧 +寨 +審 +寫 +寬 +寮 +寰 +寵 +寶 +寸 +对 +寺 +寻 +导 +対 +寿 +封 +専 +射 +将 +將 +專 +尉 +尊 +尋 +對 +導 +小 +少 +尔 +尕 +尖 +尘 +尚 +尝 +尤 +尧 +尬 +就 +尴 +尷 +尸 +尹 +尺 +尻 +尼 +尽 +尾 +尿 +局 +屁 +层 +屄 +居 +屆 +屈 +屉 +届 +屋 +屌 +屍 +屎 +屏 +屐 +屑 +展 +屜 +属 +屠 +屡 +屢 +層 +履 +屬 +屯 +山 +屹 +屿 +岀 +岁 +岂 +岌 +岐 +岑 +岔 +岖 +岗 +岘 +岙 +岚 +岛 +岡 +岩 +岫 +岬 +岭 +岱 +岳 +岷 +岸 +峇 +峋 +峒 +峙 +峡 +峤 +峥 +峦 +峨 +峪 +峭 +峯 +峰 +峴 +島 +峻 +峽 +崁 +崂 +崆 +崇 +崎 +崑 +崔 +崖 +崗 +崙 +崛 +崧 +崩 +崭 +崴 +崽 +嵇 +嵊 +嵋 +嵌 +嵐 +嵘 +嵩 +嵬 +嵯 +嶂 +嶄 +嶇 +嶋 +嶙 +嶺 +嶼 +嶽 +巅 +巍 +巒 +巔 +巖 +川 +州 +巡 +巢 +工 +左 +巧 +巨 +巩 +巫 +差 +己 +已 +巳 +巴 +巷 +巻 +巽 +巾 +巿 +币 +市 +布 +帅 +帆 +师 +希 +帐 +帑 +帕 +帖 +帘 +帚 +帛 +帜 +帝 +帥 +带 +帧 +師 +席 +帮 +帯 +帰 +帳 +帶 +帷 +常 +帼 +帽 +幀 +幂 +幄 +幅 +幌 +幔 +幕 +幟 +幡 +幢 +幣 +幫 +干 +平 +年 +并 +幸 +幹 +幺 +幻 +幼 +幽 +幾 +广 +庁 +広 +庄 +庆 +庇 +床 +序 +庐 +库 +应 +底 +庖 +店 +庙 +庚 +府 +庞 +废 +庠 +度 +座 +庫 +庭 +庵 +庶 +康 +庸 +庹 +庾 +廁 +廂 +廃 +廈 +廉 +廊 +廓 +廖 +廚 +廝 +廟 +廠 +廢 +廣 +廬 +廳 +延 +廷 +建 +廿 +开 +弁 +异 +弃 +弄 +弈 +弊 +弋 +式 +弑 +弒 +弓 +弔 +引 +弗 +弘 +弛 +弟 +张 +弥 +弦 +弧 +弩 +弭 +弯 +弱 +張 +強 +弹 +强 +弼 +弾 +彅 +彆 +彈 +彌 +彎 +归 +当 +录 +彗 +彙 +彝 +形 +彤 +彥 +彦 +彧 +彩 +彪 +彫 +彬 +彭 +彰 +影 +彷 +役 +彻 +彼 +彿 +往 +征 +径 +待 +徇 +很 +徉 +徊 +律 +後 +徐 +徑 +徒 +従 +徕 +得 +徘 +徙 +徜 +從 +徠 +御 +徨 +復 +循 +徬 +微 +徳 +徴 +徵 +德 +徹 +徼 +徽 +心 +必 +忆 +忌 +忍 +忏 +忐 +忑 +忒 +忖 +志 +忘 +忙 +応 +忠 +忡 +忤 +忧 +忪 +快 +忱 +念 +忻 +忽 +忿 +怀 +态 +怂 +怅 +怆 +怎 +怏 +怒 +怔 +怕 +怖 +怙 +怜 +思 +怠 +怡 +急 +怦 +性 +怨 +怪 +怯 +怵 +总 +怼 +恁 +恃 +恆 +恋 +恍 +恐 +恒 +恕 +恙 +恚 +恢 +恣 +恤 +恥 +恨 +恩 +恪 +恫 +恬 +恭 +息 +恰 +恳 +恵 +恶 +恸 +恺 +恻 +恼 +恿 +悄 +悅 +悉 +悌 +悍 +悔 +悖 +悚 +悟 +悠 +患 +悦 +您 +悩 +悪 +悬 +悯 +悱 +悲 +悴 +悵 +悶 +悸 +悻 +悼 +悽 +情 +惆 +惇 +惊 +惋 +惑 +惕 +惘 +惚 +惜 +惟 +惠 +惡 +惦 +惧 +惨 +惩 +惫 +惬 +惭 +惮 +惯 +惰 +惱 +想 +惴 +惶 +惹 +惺 +愁 +愆 +愈 +愉 +愍 +意 +愕 +愚 +愛 +愜 +感 +愣 +愤 +愧 +愫 +愷 +愿 +慄 +慈 +態 +慌 +慎 +慑 +慕 +慘 +慚 +慟 +慢 +慣 +慧 +慨 +慫 +慮 +慰 +慳 +慵 +慶 +慷 +慾 +憂 +憊 +憋 +憎 +憐 +憑 +憔 +憚 +憤 +憧 +憨 +憩 +憫 +憬 +憲 +憶 +憾 +懂 +懇 +懈 +應 +懊 +懋 +懑 +懒 +懦 +懲 +懵 +懶 +懷 +懸 +懺 +懼 +懾 +懿 +戀 +戈 +戊 +戌 +戍 +戎 +戏 +成 +我 +戒 +戕 +或 +战 +戚 +戛 +戟 +戡 +戦 +截 +戬 +戮 +戰 +戲 +戳 +戴 +戶 +户 +戸 +戻 +戾 +房 +所 +扁 +扇 +扈 +扉 +手 +才 +扎 +扑 +扒 +打 +扔 +払 +托 +扛 +扣 +扦 +执 +扩 +扪 +扫 +扬 +扭 +扮 +扯 +扰 +扱 +扳 +扶 +批 +扼 +找 +承 +技 +抄 +抉 +把 +抑 +抒 +抓 +投 +抖 +抗 +折 +抚 +抛 +抜 +択 +抟 +抠 +抡 +抢 +护 +报 +抨 +披 +抬 +抱 +抵 +抹 +押 +抽 +抿 +拂 +拄 +担 +拆 +拇 +拈 +拉 +拋 +拌 +拍 +拎 +拐 +拒 +拓 +拔 +拖 +拗 +拘 +拙 +拚 +招 +拜 +拟 +拡 +拢 +拣 +拥 +拦 +拧 +拨 +择 +括 +拭 +拮 +拯 +拱 +拳 +拴 +拷 +拼 +拽 +拾 +拿 +持 +挂 +指 +挈 +按 +挎 +挑 +挖 +挙 +挚 +挛 +挝 +挞 +挟 +挠 +挡 +挣 +挤 +挥 +挨 +挪 +挫 +振 +挲 +挹 +挺 +挽 +挾 +捂 +捅 +捆 +捉 +捋 +捌 +捍 +捎 +捏 +捐 +捕 +捞 +损 +捡 +换 +捣 +捧 +捨 +捩 +据 +捱 +捲 +捶 +捷 +捺 +捻 +掀 +掂 +掃 +掇 +授 +掉 +掌 +掏 +掐 +排 +掖 +掘 +掙 +掛 +掠 +採 +探 +掣 +接 +控 +推 +掩 +措 +掬 +掰 +掲 +掳 +掴 +掷 +掸 +掺 +揀 +揃 +揄 +揆 +揉 +揍 +描 +提 +插 +揖 +揚 +換 +握 +揣 +揩 +揪 +揭 +揮 +援 +揶 +揸 +揹 +揽 +搀 +搁 +搂 +搅 +損 +搏 +搐 +搓 +搔 +搖 +搗 +搜 +搞 +搡 +搪 +搬 +搭 +搵 +搶 +携 +搽 +摀 +摁 +摄 +摆 +摇 +摈 +摊 +摒 +摔 +摘 +摞 +摟 +摧 +摩 +摯 +摳 +摸 +摹 +摺 +摻 +撂 +撃 +撅 +撇 +撈 +撐 +撑 +撒 +撓 +撕 +撚 +撞 +撤 +撥 +撩 +撫 +撬 +播 +撮 +撰 +撲 +撵 +撷 +撸 +撻 +撼 +撿 +擀 +擁 +擂 +擄 +擅 +擇 +擊 +擋 +操 +擎 +擒 +擔 +擘 +據 +擞 +擠 +擡 +擢 +擦 +擬 +擰 +擱 +擲 +擴 +擷 +擺 +擼 +擾 +攀 +攏 +攒 +攔 +攘 +攙 +攜 +攝 +攞 +攢 +攣 +攤 +攥 +攪 +攫 +攬 +支 +收 +攸 +改 +攻 +放 +政 +故 +效 +敌 +敍 +敎 +敏 +救 +敕 +敖 +敗 +敘 +教 +敛 +敝 +敞 +敢 +散 +敦 +敬 +数 +敲 +整 +敵 +敷 +數 +斂 +斃 +文 +斋 +斌 +斎 +斐 +斑 +斓 +斗 +料 +斛 +斜 +斟 +斡 +斤 +斥 +斧 +斩 +斫 +斬 +断 +斯 +新 +斷 +方 +於 +施 +旁 +旃 +旅 +旋 +旌 +旎 +族 +旖 +旗 +无 +既 +日 +旦 +旧 +旨 +早 +旬 +旭 +旮 +旱 +时 +旷 +旺 +旻 +昀 +昂 +昆 +昇 +昉 +昊 +昌 +明 +昏 +易 +昔 +昕 +昙 +星 +映 +春 +昧 +昨 +昭 +是 +昱 +昴 +昵 +昶 +昼 +显 +晁 +時 +晃 +晉 +晋 +晌 +晏 +晒 +晓 +晔 +晕 +晖 +晗 +晚 +晝 +晞 +晟 +晤 +晦 +晨 +晩 +普 +景 +晰 +晴 +晶 +晷 +智 +晾 +暂 +暄 +暇 +暈 +暉 +暌 +暐 +暑 +暖 +暗 +暝 +暢 +暧 +暨 +暫 +暮 +暱 +暴 +暸 +暹 +曄 +曆 +曇 +曉 +曖 +曙 +曜 +曝 +曠 +曦 +曬 +曰 +曲 +曳 +更 +書 +曹 +曼 +曾 +替 +最 +會 +月 +有 +朋 +服 +朐 +朔 +朕 +朗 +望 +朝 +期 +朦 +朧 +木 +未 +末 +本 +札 +朮 +术 +朱 +朴 +朵 +机 +朽 +杀 +杂 +权 +杆 +杈 +杉 +李 +杏 +材 +村 +杓 +杖 +杜 +杞 +束 +杠 +条 +来 +杨 +杭 +杯 +杰 +東 +杳 +杵 +杷 +杼 +松 +板 +极 +构 +枇 +枉 +枋 +析 +枕 +林 +枚 +果 +枝 +枢 +枣 +枪 +枫 +枭 +枯 +枰 +枱 +枳 +架 +枷 +枸 +柄 +柏 +某 +柑 +柒 +染 +柔 +柘 +柚 +柜 +柞 +柠 +柢 +查 +柩 +柬 +柯 +柱 +柳 +柴 +柵 +査 +柿 +栀 +栃 +栄 +栅 +标 +栈 +栉 +栋 +栎 +栏 +树 +栓 +栖 +栗 +校 +栩 +株 +样 +核 +根 +格 +栽 +栾 +桀 +桁 +桂 +桃 +桅 +框 +案 +桉 +桌 +桎 +桐 +桑 +桓 +桔 +桜 +桠 +桡 +桢 +档 +桥 +桦 +桧 +桨 +桩 +桶 +桿 +梁 +梅 +梆 +梏 +梓 +梗 +條 +梟 +梢 +梦 +梧 +梨 +梭 +梯 +械 +梳 +梵 +梶 +检 +棂 +棄 +棉 +棋 +棍 +棒 +棕 +棗 +棘 +棚 +棟 +棠 +棣 +棧 +森 +棱 +棲 +棵 +棹 +棺 +椁 +椅 +椋 +植 +椎 +椒 +検 +椪 +椭 +椰 +椹 +椽 +椿 +楂 +楊 +楓 +楔 +楚 +楝 +楞 +楠 +楣 +楨 +楫 +業 +楮 +極 +楷 +楸 +楹 +楼 +楽 +概 +榄 +榆 +榈 +榉 +榔 +榕 +榖 +榛 +榜 +榨 +榫 +榭 +榮 +榱 +榴 +榷 +榻 +槁 +槃 +構 +槌 +槍 +槎 +槐 +槓 +様 +槛 +槟 +槤 +槭 +槲 +槳 +槻 +槽 +槿 +樁 +樂 +樊 +樑 +樓 +標 +樞 +樟 +模 +樣 +権 +横 +樫 +樯 +樱 +樵 +樸 +樹 +樺 +樽 +樾 +橄 +橇 +橋 +橐 +橘 +橙 +機 +橡 +橢 +橫 +橱 +橹 +橼 +檀 +檄 +檎 +檐 +檔 +檗 +檜 +檢 +檬 +檯 +檳 +檸 +檻 +櫃 +櫚 +櫛 +櫥 +櫸 +櫻 +欄 +權 +欒 +欖 +欠 +次 +欢 +欣 +欧 +欲 +欸 +欺 +欽 +款 +歆 +歇 +歉 +歌 +歎 +歐 +歓 +歙 +歛 +歡 +止 +正 +此 +步 +武 +歧 +歩 +歪 +歯 +歲 +歳 +歴 +歷 +歸 +歹 +死 +歼 +殁 +殃 +殆 +殇 +殉 +殊 +残 +殒 +殓 +殖 +殘 +殞 +殡 +殤 +殭 +殯 +殲 +殴 +段 +殷 +殺 +殼 +殿 +毀 +毁 +毂 +毅 +毆 +毋 +母 +毎 +每 +毒 +毓 +比 +毕 +毗 +毘 +毙 +毛 +毡 +毫 +毯 +毽 +氈 +氏 +氐 +民 +氓 +气 +氖 +気 +氙 +氛 +氟 +氡 +氢 +氣 +氤 +氦 +氧 +氨 +氪 +氫 +氮 +氯 +氰 +氲 +水 +氷 +永 +氹 +氾 +汀 +汁 +求 +汆 +汇 +汉 +汎 +汐 +汕 +汗 +汙 +汛 +汝 +汞 +江 +池 +污 +汤 +汨 +汩 +汪 +汰 +汲 +汴 +汶 +汹 +決 +汽 +汾 +沁 +沂 +沃 +沅 +沈 +沉 +沌 +沏 +沐 +沒 +沓 +沖 +沙 +沛 +沟 +没 +沢 +沣 +沥 +沦 +沧 +沪 +沫 +沭 +沮 +沱 +河 +沸 +油 +治 +沼 +沽 +沾 +沿 +況 +泄 +泉 +泊 +泌 +泓 +法 +泗 +泛 +泞 +泠 +泡 +波 +泣 +泥 +注 +泪 +泫 +泮 +泯 +泰 +泱 +泳 +泵 +泷 +泸 +泻 +泼 +泽 +泾 +洁 +洄 +洋 +洒 +洗 +洙 +洛 +洞 +津 +洩 +洪 +洮 +洱 +洲 +洵 +洶 +洸 +洹 +活 +洼 +洽 +派 +流 +浃 +浄 +浅 +浆 +浇 +浊 +测 +济 +浏 +浑 +浒 +浓 +浔 +浙 +浚 +浜 +浣 +浦 +浩 +浪 +浬 +浮 +浯 +浴 +海 +浸 +涂 +涅 +涇 +消 +涉 +涌 +涎 +涓 +涔 +涕 +涙 +涛 +涝 +涞 +涟 +涠 +涡 +涣 +涤 +润 +涧 +涨 +涩 +涪 +涮 +涯 +液 +涵 +涸 +涼 +涿 +淀 +淄 +淅 +淆 +淇 +淋 +淌 +淑 +淒 +淖 +淘 +淙 +淚 +淞 +淡 +淤 +淦 +淨 +淩 +淪 +淫 +淬 +淮 +深 +淳 +淵 +混 +淹 +淺 +添 +淼 +清 +済 +渉 +渊 +渋 +渍 +渎 +渐 +渔 +渗 +渙 +渚 +減 +渝 +渠 +渡 +渣 +渤 +渥 +渦 +温 +測 +渭 +港 +渲 +渴 +游 +渺 +渾 +湃 +湄 +湊 +湍 +湖 +湘 +湛 +湟 +湧 +湫 +湮 +湯 +湳 +湾 +湿 +満 +溃 +溅 +溉 +溏 +源 +準 +溜 +溝 +溟 +溢 +溥 +溧 +溪 +溫 +溯 +溱 +溴 +溶 +溺 +溼 +滁 +滂 +滄 +滅 +滇 +滋 +滌 +滑 +滓 +滔 +滕 +滙 +滚 +滝 +滞 +滟 +满 +滢 +滤 +滥 +滦 +滨 +滩 +滬 +滯 +滲 +滴 +滷 +滸 +滾 +滿 +漁 +漂 +漆 +漉 +漏 +漓 +演 +漕 +漠 +漢 +漣 +漩 +漪 +漫 +漬 +漯 +漱 +漲 +漳 +漸 +漾 +漿 +潆 +潇 +潋 +潍 +潑 +潔 +潘 +潛 +潜 +潞 +潟 +潢 +潤 +潦 +潧 +潭 +潮 +潰 +潴 +潸 +潺 +潼 +澀 +澄 +澆 +澈 +澍 +澎 +澗 +澜 +澡 +澤 +澧 +澱 +澳 +澹 +激 +濁 +濂 +濃 +濑 +濒 +濕 +濘 +濛 +濟 +濠 +濡 +濤 +濫 +濬 +濮 +濯 +濱 +濺 +濾 +瀅 +瀆 +瀉 +瀋 +瀏 +瀑 +瀕 +瀘 +瀚 +瀛 +瀝 +瀞 +瀟 +瀧 +瀨 +瀬 +瀰 +瀾 +灌 +灏 +灑 +灘 +灝 +灞 +灣 +火 +灬 +灭 +灯 +灰 +灵 +灶 +灸 +灼 +災 +灾 +灿 +炀 +炁 +炅 +炉 +炊 +炎 +炒 +炔 +炕 +炖 +炙 +炜 +炫 +炬 +炭 +炮 +炯 +炳 +炷 +炸 +点 +為 +炼 +炽 +烁 +烂 +烃 +烈 +烊 +烏 +烘 +烙 +烛 +烟 +烤 +烦 +烧 +烨 +烩 +烫 +烬 +热 +烯 +烷 +烹 +烽 +焉 +焊 +焕 +焖 +焗 +焘 +焙 +焚 +焜 +無 +焦 +焯 +焰 +焱 +然 +焼 +煅 +煉 +煊 +煌 +煎 +煒 +煖 +煙 +煜 +煞 +煤 +煥 +煦 +照 +煨 +煩 +煮 +煲 +煸 +煽 +熄 +熊 +熏 +熒 +熔 +熙 +熟 +熠 +熨 +熬 +熱 +熵 +熹 +熾 +燁 +燃 +燄 +燈 +燉 +燊 +燎 +燒 +燔 +燕 +燙 +燜 +營 +燥 +燦 +燧 +燭 +燮 +燴 +燻 +燼 +燿 +爆 +爍 +爐 +爛 +爪 +爬 +爭 +爰 +爱 +爲 +爵 +父 +爷 +爸 +爹 +爺 +爻 +爽 +爾 +牆 +片 +版 +牌 +牍 +牒 +牙 +牛 +牝 +牟 +牠 +牡 +牢 +牦 +牧 +物 +牯 +牲 +牴 +牵 +特 +牺 +牽 +犀 +犁 +犄 +犊 +犍 +犒 +犢 +犧 +犬 +犯 +状 +犷 +犸 +犹 +狀 +狂 +狄 +狈 +狎 +狐 +狒 +狗 +狙 +狞 +狠 +狡 +狩 +独 +狭 +狮 +狰 +狱 +狸 +狹 +狼 +狽 +猎 +猕 +猖 +猗 +猙 +猛 +猜 +猝 +猥 +猩 +猪 +猫 +猬 +献 +猴 +猶 +猷 +猾 +猿 +獄 +獅 +獎 +獐 +獒 +獗 +獠 +獣 +獨 +獭 +獰 +獲 +獵 +獷 +獸 +獺 +獻 +獼 +獾 +玄 +率 +玉 +王 +玑 +玖 +玛 +玟 +玠 +玥 +玩 +玫 +玮 +环 +现 +玲 +玳 +玷 +玺 +玻 +珀 +珂 +珅 +珈 +珉 +珊 +珍 +珏 +珐 +珑 +珙 +珞 +珠 +珣 +珥 +珩 +珪 +班 +珮 +珲 +珺 +現 +球 +琅 +理 +琇 +琉 +琊 +琍 +琏 +琐 +琛 +琢 +琥 +琦 +琨 +琪 +琬 +琮 +琰 +琲 +琳 +琴 +琵 +琶 +琺 +琼 +瑀 +瑁 +瑄 +瑋 +瑕 +瑗 +瑙 +瑚 +瑛 +瑜 +瑞 +瑟 +瑠 +瑣 +瑤 +瑩 +瑪 +瑯 +瑰 +瑶 +瑾 +璀 +璁 +璃 +璇 +璉 +璋 +璎 +璐 +璜 +璞 +璟 +璧 +璨 +環 +璽 +璿 +瓊 +瓏 +瓒 +瓜 +瓢 +瓣 +瓤 +瓦 +瓮 +瓯 +瓴 +瓶 +瓷 +甄 +甌 +甕 +甘 +甙 +甚 +甜 +生 +產 +産 +甥 +甦 +用 +甩 +甫 +甬 +甭 +甯 +田 +由 +甲 +申 +电 +男 +甸 +町 +画 +甾 +畀 +畅 +界 +畏 +畑 +畔 +留 +畜 +畝 +畢 +略 +畦 +番 +畫 +異 +畲 +畳 +畴 +當 +畸 +畹 +畿 +疆 +疇 +疊 +疏 +疑 +疔 +疖 +疗 +疙 +疚 +疝 +疟 +疡 +疣 +疤 +疥 +疫 +疮 +疯 +疱 +疲 +疳 +疵 +疸 +疹 +疼 +疽 +疾 +痂 +病 +症 +痈 +痉 +痊 +痍 +痒 +痔 +痕 +痘 +痙 +痛 +痞 +痠 +痢 +痣 +痤 +痧 +痨 +痪 +痫 +痰 +痱 +痴 +痹 +痺 +痼 +痿 +瘀 +瘁 +瘋 +瘍 +瘓 +瘘 +瘙 +瘟 +瘠 +瘡 +瘢 +瘤 +瘦 +瘧 +瘩 +瘪 +瘫 +瘴 +瘸 +瘾 +療 +癇 +癌 +癒 +癖 +癜 +癞 +癡 +癢 +癣 +癥 +癫 +癬 +癮 +癱 +癲 +癸 +発 +登 +發 +白 +百 +皂 +的 +皆 +皇 +皈 +皋 +皎 +皑 +皓 +皖 +皙 +皚 +皮 +皰 +皱 +皴 +皺 +皿 +盂 +盃 +盅 +盆 +盈 +益 +盎 +盏 +盐 +监 +盒 +盔 +盖 +盗 +盘 +盛 +盜 +盞 +盟 +盡 +監 +盤 +盥 +盧 +盪 +目 +盯 +盱 +盲 +直 +相 +盹 +盼 +盾 +省 +眈 +眉 +看 +県 +眙 +眞 +真 +眠 +眦 +眨 +眩 +眯 +眶 +眷 +眸 +眺 +眼 +眾 +着 +睁 +睇 +睏 +睐 +睑 +睛 +睜 +睞 +睡 +睢 +督 +睥 +睦 +睨 +睪 +睫 +睬 +睹 +睽 +睾 +睿 +瞄 +瞅 +瞇 +瞋 +瞌 +瞎 +瞑 +瞒 +瞓 +瞞 +瞟 +瞠 +瞥 +瞧 +瞩 +瞪 +瞬 +瞭 +瞰 +瞳 +瞻 +瞼 +瞿 +矇 +矍 +矗 +矚 +矛 +矜 +矢 +矣 +知 +矩 +矫 +短 +矮 +矯 +石 +矶 +矽 +矾 +矿 +码 +砂 +砌 +砍 +砒 +研 +砖 +砗 +砚 +砝 +砣 +砥 +砧 +砭 +砰 +砲 +破 +砷 +砸 +砺 +砼 +砾 +础 +硅 +硐 +硒 +硕 +硝 +硫 +硬 +确 +硯 +硼 +碁 +碇 +碉 +碌 +碍 +碎 +碑 +碓 +碗 +碘 +碚 +碛 +碟 +碣 +碧 +碩 +碰 +碱 +碳 +碴 +確 +碼 +碾 +磁 +磅 +磊 +磋 +磐 +磕 +磚 +磡 +磨 +磬 +磯 +磲 +磷 +磺 +礁 +礎 +礙 +礡 +礦 +礪 +礫 +礴 +示 +礼 +社 +祀 +祁 +祂 +祇 +祈 +祉 +祎 +祐 +祕 +祖 +祗 +祚 +祛 +祜 +祝 +神 +祟 +祠 +祢 +祥 +票 +祭 +祯 +祷 +祸 +祺 +祿 +禀 +禁 +禄 +禅 +禍 +禎 +福 +禛 +禦 +禧 +禪 +禮 +禱 +禹 +禺 +离 +禽 +禾 +禿 +秀 +私 +秃 +秆 +秉 +秋 +种 +科 +秒 +秘 +租 +秣 +秤 +秦 +秧 +秩 +秭 +积 +称 +秸 +移 +秽 +稀 +稅 +程 +稍 +税 +稔 +稗 +稚 +稜 +稞 +稟 +稠 +稣 +種 +稱 +稲 +稳 +稷 +稹 +稻 +稼 +稽 +稿 +穀 +穂 +穆 +穌 +積 +穎 +穗 +穢 +穩 +穫 +穴 +究 +穷 +穹 +空 +穿 +突 +窃 +窄 +窈 +窍 +窑 +窒 +窓 +窕 +窖 +窗 +窘 +窜 +窝 +窟 +窠 +窥 +窦 +窨 +窩 +窪 +窮 +窯 +窺 +窿 +竄 +竅 +竇 +竊 +立 +竖 +站 +竜 +竞 +竟 +章 +竣 +童 +竭 +端 +競 +竹 +竺 +竽 +竿 +笃 +笆 +笈 +笋 +笏 +笑 +笔 +笙 +笛 +笞 +笠 +符 +笨 +第 +笹 +笺 +笼 +筆 +等 +筊 +筋 +筍 +筏 +筐 +筑 +筒 +答 +策 +筛 +筝 +筠 +筱 +筲 +筵 +筷 +筹 +签 +简 +箇 +箋 +箍 +箏 +箐 +箔 +箕 +算 +箝 +管 +箩 +箫 +箭 +箱 +箴 +箸 +節 +篁 +範 +篆 +篇 +築 +篑 +篓 +篙 +篝 +篠 +篡 +篤 +篩 +篪 +篮 +篱 +篷 +簇 +簌 +簍 +簡 +簦 +簧 +簪 +簫 +簷 +簸 +簽 +簾 +簿 +籁 +籃 +籌 +籍 +籐 +籟 +籠 +籤 +籬 +籮 +籲 +米 +类 +籼 +籽 +粄 +粉 +粑 +粒 +粕 +粗 +粘 +粟 +粤 +粥 +粧 +粪 +粮 +粱 +粲 +粳 +粵 +粹 +粼 +粽 +精 +粿 +糅 +糊 +糍 +糕 +糖 +糗 +糙 +糜 +糞 +糟 +糠 +糧 +糬 +糯 +糰 +糸 +系 +糾 +紀 +紂 +約 +紅 +紉 +紊 +紋 +納 +紐 +紓 +純 +紗 +紘 +紙 +級 +紛 +紜 +素 +紡 +索 +紧 +紫 +紮 +累 +細 +紳 +紹 +紺 +終 +絃 +組 +絆 +経 +結 +絕 +絞 +絡 +絢 +給 +絨 +絮 +統 +絲 +絳 +絵 +絶 +絹 +綁 +綏 +綑 +經 +継 +続 +綜 +綠 +綢 +綦 +綫 +綬 +維 +綱 +網 +綴 +綵 +綸 +綺 +綻 +綽 +綾 +綿 +緊 +緋 +総 +緑 +緒 +緘 +線 +緝 +緞 +締 +緣 +編 +緩 +緬 +緯 +練 +緹 +緻 +縁 +縄 +縈 +縛 +縝 +縣 +縫 +縮 +縱 +縴 +縷 +總 +績 +繁 +繃 +繆 +繇 +繋 +織 +繕 +繚 +繞 +繡 +繩 +繪 +繫 +繭 +繳 +繹 +繼 +繽 +纂 +續 +纍 +纏 +纓 +纔 +纖 +纜 +纠 +红 +纣 +纤 +约 +级 +纨 +纪 +纫 +纬 +纭 +纯 +纰 +纱 +纲 +纳 +纵 +纶 +纷 +纸 +纹 +纺 +纽 +纾 +线 +绀 +练 +组 +绅 +细 +织 +终 +绊 +绍 +绎 +经 +绑 +绒 +结 +绔 +绕 +绘 +给 +绚 +绛 +络 +绝 +绞 +统 +绡 +绢 +绣 +绥 +绦 +继 +绩 +绪 +绫 +续 +绮 +绯 +绰 +绳 +维 +绵 +绶 +绷 +绸 +绻 +综 +绽 +绾 +绿 +缀 +缄 +缅 +缆 +缇 +缈 +缉 +缎 +缓 +缔 +缕 +编 +缘 +缙 +缚 +缜 +缝 +缠 +缢 +缤 +缥 +缨 +缩 +缪 +缭 +缮 +缰 +缱 +缴 +缸 +缺 +缽 +罂 +罄 +罌 +罐 +网 +罔 +罕 +罗 +罚 +罡 +罢 +罩 +罪 +置 +罰 +署 +罵 +罷 +罹 +羁 +羅 +羈 +羊 +羌 +美 +羔 +羚 +羞 +羟 +羡 +羣 +群 +羥 +羧 +羨 +義 +羯 +羲 +羸 +羹 +羽 +羿 +翁 +翅 +翊 +翌 +翎 +習 +翔 +翘 +翟 +翠 +翡 +翦 +翩 +翰 +翱 +翳 +翹 +翻 +翼 +耀 +老 +考 +耄 +者 +耆 +耋 +而 +耍 +耐 +耒 +耕 +耗 +耘 +耙 +耦 +耨 +耳 +耶 +耷 +耸 +耻 +耽 +耿 +聂 +聆 +聊 +聋 +职 +聒 +联 +聖 +聘 +聚 +聞 +聪 +聯 +聰 +聲 +聳 +聴 +聶 +職 +聽 +聾 +聿 +肃 +肄 +肅 +肆 +肇 +肉 +肋 +肌 +肏 +肓 +肖 +肘 +肚 +肛 +肝 +肠 +股 +肢 +肤 +肥 +肩 +肪 +肮 +肯 +肱 +育 +肴 +肺 +肽 +肾 +肿 +胀 +胁 +胃 +胄 +胆 +背 +胍 +胎 +胖 +胚 +胛 +胜 +胝 +胞 +胡 +胤 +胥 +胧 +胫 +胭 +胯 +胰 +胱 +胳 +胴 +胶 +胸 +胺 +能 +脂 +脅 +脆 +脇 +脈 +脉 +脊 +脍 +脏 +脐 +脑 +脓 +脖 +脘 +脚 +脛 +脣 +脩 +脫 +脯 +脱 +脲 +脳 +脸 +脹 +脾 +腆 +腈 +腊 +腋 +腌 +腎 +腐 +腑 +腓 +腔 +腕 +腥 +腦 +腩 +腫 +腭 +腮 +腰 +腱 +腳 +腴 +腸 +腹 +腺 +腻 +腼 +腾 +腿 +膀 +膈 +膊 +膏 +膑 +膘 +膚 +膛 +膜 +膝 +膠 +膦 +膨 +膩 +膳 +膺 +膻 +膽 +膾 +膿 +臀 +臂 +臃 +臆 +臉 +臊 +臍 +臓 +臘 +臟 +臣 +臥 +臧 +臨 +自 +臬 +臭 +至 +致 +臺 +臻 +臼 +臾 +舀 +舂 +舅 +舆 +與 +興 +舉 +舊 +舌 +舍 +舎 +舐 +舒 +舔 +舖 +舗 +舛 +舜 +舞 +舟 +航 +舫 +般 +舰 +舱 +舵 +舶 +舷 +舸 +船 +舺 +舾 +艇 +艋 +艘 +艙 +艦 +艮 +良 +艰 +艱 +色 +艳 +艷 +艹 +艺 +艾 +节 +芃 +芈 +芊 +芋 +芍 +芎 +芒 +芙 +芜 +芝 +芡 +芥 +芦 +芩 +芪 +芫 +芬 +芭 +芮 +芯 +花 +芳 +芷 +芸 +芹 +芻 +芽 +芾 +苁 +苄 +苇 +苋 +苍 +苏 +苑 +苒 +苓 +苔 +苕 +苗 +苛 +苜 +苞 +苟 +苡 +苣 +若 +苦 +苫 +苯 +英 +苷 +苹 +苻 +茁 +茂 +范 +茄 +茅 +茉 +茎 +茏 +茗 +茜 +茧 +茨 +茫 +茬 +茭 +茯 +茱 +茲 +茴 +茵 +茶 +茸 +茹 +茼 +荀 +荃 +荆 +草 +荊 +荏 +荐 +荒 +荔 +荖 +荘 +荚 +荞 +荟 +荠 +荡 +荣 +荤 +荥 +荧 +荨 +荪 +荫 +药 +荳 +荷 +荸 +荻 +荼 +荽 +莅 +莆 +莉 +莊 +莎 +莒 +莓 +莖 +莘 +莞 +莠 +莢 +莧 +莪 +莫 +莱 +莲 +莴 +获 +莹 +莺 +莽 +莿 +菀 +菁 +菅 +菇 +菈 +菊 +菌 +菏 +菓 +菖 +菘 +菜 +菟 +菠 +菡 +菩 +華 +菱 +菲 +菸 +菽 +萁 +萃 +萄 +萊 +萋 +萌 +萍 +萎 +萘 +萝 +萤 +营 +萦 +萧 +萨 +萩 +萬 +萱 +萵 +萸 +萼 +落 +葆 +葉 +著 +葚 +葛 +葡 +董 +葦 +葩 +葫 +葬 +葭 +葯 +葱 +葳 +葵 +葷 +葺 +蒂 +蒋 +蒐 +蒔 +蒙 +蒜 +蒞 +蒟 +蒡 +蒨 +蒲 +蒸 +蒹 +蒻 +蒼 +蒿 +蓁 +蓄 +蓆 +蓉 +蓋 +蓑 +蓓 +蓖 +蓝 +蓟 +蓦 +蓬 +蓮 +蓼 +蓿 +蔑 +蔓 +蔔 +蔗 +蔘 +蔚 +蔡 +蔣 +蔥 +蔫 +蔬 +蔭 +蔵 +蔷 +蔺 +蔻 +蔼 +蔽 +蕁 +蕃 +蕈 +蕉 +蕊 +蕎 +蕙 +蕤 +蕨 +蕩 +蕪 +蕭 +蕲 +蕴 +蕻 +蕾 +薄 +薅 +薇 +薈 +薊 +薏 +薑 +薔 +薙 +薛 +薦 +薨 +薩 +薪 +薬 +薯 +薰 +薹 +藉 +藍 +藏 +藐 +藓 +藕 +藜 +藝 +藤 +藥 +藩 +藹 +藻 +藿 +蘆 +蘇 +蘊 +蘋 +蘑 +蘚 +蘭 +蘸 +蘼 +蘿 +虎 +虏 +虐 +虑 +虔 +處 +虚 +虛 +虜 +虞 +號 +虢 +虧 +虫 +虬 +虱 +虹 +虻 +虽 +虾 +蚀 +蚁 +蚂 +蚊 +蚌 +蚓 +蚕 +蚜 +蚝 +蚣 +蚤 +蚩 +蚪 +蚯 +蚱 +蚵 +蛀 +蛆 +蛇 +蛊 +蛋 +蛎 +蛐 +蛔 +蛙 +蛛 +蛟 +蛤 +蛭 +蛮 +蛰 +蛳 +蛹 +蛻 +蛾 +蜀 +蜂 +蜃 +蜆 +蜇 +蜈 +蜊 +蜍 +蜒 +蜓 +蜕 +蜗 +蜘 +蜚 +蜜 +蜡 +蜢 +蜥 +蜱 +蜴 +蜷 +蜻 +蜿 +蝇 +蝈 +蝉 +蝌 +蝎 +蝕 +蝗 +蝙 +蝟 +蝠 +蝦 +蝨 +蝴 +蝶 +蝸 +蝼 +螂 +螃 +融 +螞 +螢 +螨 +螯 +螳 +螺 +蟀 +蟄 +蟆 +蟋 +蟎 +蟑 +蟒 +蟠 +蟬 +蟲 +蟹 +蟻 +蟾 +蠅 +蠍 +蠔 +蠕 +蠛 +蠟 +蠡 +蠢 +蠣 +蠱 +蠶 +蠹 +蠻 +血 +衄 +衅 +衆 +行 +衍 +術 +衔 +街 +衙 +衛 +衝 +衞 +衡 +衢 +衣 +补 +表 +衩 +衫 +衬 +衮 +衰 +衲 +衷 +衹 +衾 +衿 +袁 +袂 +袄 +袅 +袈 +袋 +袍 +袒 +袖 +袜 +袞 +袤 +袪 +被 +袭 +袱 +裁 +裂 +装 +裆 +裊 +裏 +裔 +裕 +裘 +裙 +補 +裝 +裟 +裡 +裤 +裨 +裱 +裳 +裴 +裸 +裹 +製 +裾 +褂 +複 +褐 +褒 +褓 +褔 +褚 +褥 +褪 +褫 +褲 +褶 +褻 +襁 +襄 +襟 +襠 +襪 +襬 +襯 +襲 +西 +要 +覃 +覆 +覇 +見 +規 +覓 +視 +覚 +覦 +覧 +親 +覬 +観 +覷 +覺 +覽 +觀 +见 +观 +规 +觅 +视 +览 +觉 +觊 +觎 +觐 +觑 +角 +觞 +解 +觥 +触 +觸 +言 +訂 +計 +訊 +討 +訓 +訕 +訖 +託 +記 +訛 +訝 +訟 +訣 +訥 +訪 +設 +許 +訳 +訴 +訶 +診 +註 +証 +詆 +詐 +詔 +評 +詛 +詞 +詠 +詡 +詢 +詣 +試 +詩 +詫 +詬 +詭 +詮 +詰 +話 +該 +詳 +詹 +詼 +誅 +誇 +誉 +誌 +認 +誓 +誕 +誘 +語 +誠 +誡 +誣 +誤 +誥 +誦 +誨 +說 +説 +読 +誰 +課 +誹 +誼 +調 +諄 +談 +請 +諏 +諒 +論 +諗 +諜 +諡 +諦 +諧 +諫 +諭 +諮 +諱 +諳 +諷 +諸 +諺 +諾 +謀 +謁 +謂 +謄 +謊 +謎 +謐 +謔 +謗 +謙 +講 +謝 +謠 +謨 +謬 +謹 +謾 +譁 +證 +譎 +譏 +識 +譙 +譚 +譜 +警 +譬 +譯 +議 +譲 +譴 +護 +譽 +讀 +變 +讓 +讚 +讞 +计 +订 +认 +讥 +讧 +讨 +让 +讪 +讫 +训 +议 +讯 +记 +讲 +讳 +讴 +讶 +讷 +许 +讹 +论 +讼 +讽 +设 +访 +诀 +证 +诃 +评 +诅 +识 +诈 +诉 +诊 +诋 +词 +诏 +译 +试 +诗 +诘 +诙 +诚 +诛 +话 +诞 +诟 +诠 +诡 +询 +诣 +诤 +该 +详 +诧 +诩 +诫 +诬 +语 +误 +诰 +诱 +诲 +说 +诵 +诶 +请 +诸 +诺 +读 +诽 +课 +诿 +谀 +谁 +调 +谄 +谅 +谆 +谈 +谊 +谋 +谌 +谍 +谎 +谏 +谐 +谑 +谒 +谓 +谔 +谕 +谗 +谘 +谙 +谚 +谛 +谜 +谟 +谢 +谣 +谤 +谥 +谦 +谧 +谨 +谩 +谪 +谬 +谭 +谯 +谱 +谲 +谴 +谶 +谷 +豁 +豆 +豇 +豈 +豉 +豊 +豌 +豎 +豐 +豔 +豚 +象 +豢 +豪 +豫 +豬 +豹 +豺 +貂 +貅 +貌 +貓 +貔 +貘 +貝 +貞 +負 +財 +貢 +貧 +貨 +販 +貪 +貫 +責 +貯 +貰 +貳 +貴 +貶 +買 +貸 +費 +貼 +貽 +貿 +賀 +賁 +賂 +賃 +賄 +資 +賈 +賊 +賑 +賓 +賜 +賞 +賠 +賡 +賢 +賣 +賤 +賦 +質 +賬 +賭 +賴 +賺 +購 +賽 +贅 +贈 +贊 +贍 +贏 +贓 +贖 +贛 +贝 +贞 +负 +贡 +财 +责 +贤 +败 +账 +货 +质 +贩 +贪 +贫 +贬 +购 +贮 +贯 +贰 +贱 +贲 +贴 +贵 +贷 +贸 +费 +贺 +贻 +贼 +贾 +贿 +赁 +赂 +赃 +资 +赅 +赈 +赊 +赋 +赌 +赎 +赏 +赐 +赓 +赔 +赖 +赘 +赚 +赛 +赝 +赞 +赠 +赡 +赢 +赣 +赤 +赦 +赧 +赫 +赭 +走 +赳 +赴 +赵 +赶 +起 +趁 +超 +越 +趋 +趕 +趙 +趟 +趣 +趨 +足 +趴 +趵 +趸 +趺 +趾 +跃 +跄 +跆 +跋 +跌 +跎 +跑 +跖 +跚 +跛 +距 +跟 +跡 +跤 +跨 +跩 +跪 +路 +跳 +践 +跷 +跹 +跺 +跻 +踉 +踊 +踌 +踏 +踐 +踝 +踞 +踟 +踢 +踩 +踪 +踮 +踱 +踴 +踵 +踹 +蹂 +蹄 +蹇 +蹈 +蹉 +蹊 +蹋 +蹑 +蹒 +蹙 +蹟 +蹣 +蹤 +蹦 +蹩 +蹬 +蹭 +蹲 +蹴 +蹶 +蹺 +蹼 +蹿 +躁 +躇 +躉 +躊 +躋 +躍 +躏 +躪 +身 +躬 +躯 +躲 +躺 +軀 +車 +軋 +軌 +軍 +軒 +軟 +転 +軸 +軼 +軽 +軾 +較 +載 +輒 +輓 +輔 +輕 +輛 +輝 +輟 +輩 +輪 +輯 +輸 +輻 +輾 +輿 +轄 +轅 +轆 +轉 +轍 +轎 +轟 +车 +轧 +轨 +轩 +转 +轭 +轮 +软 +轰 +轲 +轴 +轶 +轻 +轼 +载 +轿 +较 +辄 +辅 +辆 +辇 +辈 +辉 +辊 +辍 +辐 +辑 +输 +辕 +辖 +辗 +辘 +辙 +辛 +辜 +辞 +辟 +辣 +辦 +辨 +辩 +辫 +辭 +辮 +辯 +辰 +辱 +農 +边 +辺 +辻 +込 +辽 +达 +迁 +迂 +迄 +迅 +过 +迈 +迎 +运 +近 +返 +还 +这 +进 +远 +违 +连 +迟 +迢 +迤 +迥 +迦 +迩 +迪 +迫 +迭 +述 +迴 +迷 +迸 +迹 +迺 +追 +退 +送 +适 +逃 +逅 +逆 +选 +逊 +逍 +透 +逐 +递 +途 +逕 +逗 +這 +通 +逛 +逝 +逞 +速 +造 +逢 +連 +逮 +週 +進 +逵 +逶 +逸 +逻 +逼 +逾 +遁 +遂 +遅 +遇 +遊 +運 +遍 +過 +遏 +遐 +遑 +遒 +道 +達 +違 +遗 +遙 +遛 +遜 +遞 +遠 +遢 +遣 +遥 +遨 +適 +遭 +遮 +遲 +遴 +遵 +遶 +遷 +選 +遺 +遼 +遽 +避 +邀 +邁 +邂 +邃 +還 +邇 +邈 +邊 +邋 +邏 +邑 +邓 +邕 +邛 +邝 +邢 +那 +邦 +邨 +邪 +邬 +邮 +邯 +邰 +邱 +邳 +邵 +邸 +邹 +邺 +邻 +郁 +郅 +郊 +郎 +郑 +郜 +郝 +郡 +郢 +郤 +郦 +郧 +部 +郫 +郭 +郴 +郵 +郷 +郸 +都 +鄂 +鄉 +鄒 +鄔 +鄙 +鄞 +鄢 +鄧 +鄭 +鄰 +鄱 +鄲 +鄺 +酉 +酊 +酋 +酌 +配 +酐 +酒 +酗 +酚 +酝 +酢 +酣 +酥 +酩 +酪 +酬 +酮 +酯 +酰 +酱 +酵 +酶 +酷 +酸 +酿 +醃 +醇 +醉 +醋 +醍 +醐 +醒 +醚 +醛 +醜 +醞 +醣 +醪 +醫 +醬 +醮 +醯 +醴 +醺 +釀 +釁 +采 +釉 +释 +釋 +里 +重 +野 +量 +釐 +金 +釗 +釘 +釜 +針 +釣 +釦 +釧 +釵 +鈀 +鈉 +鈍 +鈎 +鈔 +鈕 +鈞 +鈣 +鈦 +鈪 +鈴 +鈺 +鈾 +鉀 +鉄 +鉅 +鉉 +鉑 +鉗 +鉚 +鉛 +鉤 +鉴 +鉻 +銀 +銃 +銅 +銑 +銓 +銖 +銘 +銜 +銬 +銭 +銮 +銳 +銷 +銹 +鋁 +鋅 +鋒 +鋤 +鋪 +鋰 +鋸 +鋼 +錄 +錐 +錘 +錚 +錠 +錢 +錦 +錨 +錫 +錮 +錯 +録 +錳 +錶 +鍊 +鍋 +鍍 +鍛 +鍥 +鍰 +鍵 +鍺 +鍾 +鎂 +鎊 +鎌 +鎏 +鎔 +鎖 +鎗 +鎚 +鎧 +鎬 +鎮 +鎳 +鏈 +鏖 +鏗 +鏘 +鏞 +鏟 +鏡 +鏢 +鏤 +鏽 +鐘 +鐮 +鐲 +鐳 +鐵 +鐸 +鐺 +鑄 +鑊 +鑑 +鑒 +鑣 +鑫 +鑰 +鑲 +鑼 +鑽 +鑾 +鑿 +针 +钉 +钊 +钎 +钏 +钒 +钓 +钗 +钙 +钛 +钜 +钝 +钞 +钟 +钠 +钡 +钢 +钣 +钤 +钥 +钦 +钧 +钨 +钩 +钮 +钯 +钰 +钱 +钳 +钴 +钵 +钺 +钻 +钼 +钾 +钿 +铀 +铁 +铂 +铃 +铄 +铅 +铆 +铉 +铎 +铐 +铛 +铜 +铝 +铠 +铡 +铢 +铣 +铤 +铨 +铩 +铬 +铭 +铮 +铰 +铲 +铵 +银 +铸 +铺 +链 +铿 +销 +锁 +锂 +锄 +锅 +锆 +锈 +锉 +锋 +锌 +锏 +锐 +锑 +错 +锚 +锟 +锡 +锢 +锣 +锤 +锥 +锦 +锭 +键 +锯 +锰 +锲 +锵 +锹 +锺 +锻 +镀 +镁 +镂 +镇 +镉 +镌 +镍 +镐 +镑 +镕 +镖 +镗 +镛 +镜 +镣 +镭 +镯 +镰 +镳 +镶 +長 +长 +門 +閃 +閉 +開 +閎 +閏 +閑 +閒 +間 +閔 +閘 +閡 +関 +閣 +閥 +閨 +閩 +閱 +閲 +閹 +閻 +閾 +闆 +闇 +闊 +闌 +闍 +闔 +闕 +闖 +闘 +關 +闡 +闢 +门 +闪 +闫 +闭 +问 +闯 +闰 +闲 +间 +闵 +闷 +闸 +闹 +闺 +闻 +闽 +闾 +阀 +阁 +阂 +阅 +阆 +阇 +阈 +阉 +阎 +阐 +阑 +阔 +阕 +阖 +阙 +阚 +阜 +队 +阡 +阪 +阮 +阱 +防 +阳 +阴 +阵 +阶 +阻 +阿 +陀 +陂 +附 +际 +陆 +陇 +陈 +陋 +陌 +降 +限 +陕 +陛 +陝 +陞 +陟 +陡 +院 +陣 +除 +陨 +险 +陪 +陰 +陲 +陳 +陵 +陶 +陷 +陸 +険 +陽 +隅 +隆 +隈 +隊 +隋 +隍 +階 +随 +隐 +隔 +隕 +隘 +隙 +際 +障 +隠 +隣 +隧 +隨 +險 +隱 +隴 +隶 +隸 +隻 +隼 +隽 +难 +雀 +雁 +雄 +雅 +集 +雇 +雉 +雋 +雌 +雍 +雎 +雏 +雑 +雒 +雕 +雖 +雙 +雛 +雜 +雞 +離 +難 +雨 +雪 +雯 +雰 +雲 +雳 +零 +雷 +雹 +電 +雾 +需 +霁 +霄 +霆 +震 +霈 +霉 +霊 +霍 +霎 +霏 +霑 +霓 +霖 +霜 +霞 +霧 +霭 +霰 +露 +霸 +霹 +霽 +霾 +靂 +靄 +靈 +青 +靓 +靖 +静 +靚 +靛 +靜 +非 +靠 +靡 +面 +靥 +靦 +革 +靳 +靴 +靶 +靼 +鞅 +鞋 +鞍 +鞏 +鞑 +鞘 +鞠 +鞣 +鞦 +鞭 +韆 +韋 +韌 +韓 +韜 +韦 +韧 +韩 +韬 +韭 +音 +韵 +韶 +韻 +響 +頁 +頂 +頃 +項 +順 +須 +頌 +預 +頑 +頒 +頓 +頗 +領 +頜 +頡 +頤 +頫 +頭 +頰 +頷 +頸 +頹 +頻 +頼 +顆 +題 +額 +顎 +顏 +顔 +願 +顛 +類 +顧 +顫 +顯 +顱 +顴 +页 +顶 +顷 +项 +顺 +须 +顼 +顽 +顾 +顿 +颁 +颂 +预 +颅 +领 +颇 +颈 +颉 +颊 +颌 +颍 +颐 +频 +颓 +颔 +颖 +颗 +题 +颚 +颛 +颜 +额 +颞 +颠 +颡 +颢 +颤 +颦 +颧 +風 +颯 +颱 +颳 +颶 +颼 +飄 +飆 +风 +飒 +飓 +飕 +飘 +飙 +飚 +飛 +飞 +食 +飢 +飨 +飩 +飪 +飯 +飲 +飼 +飽 +飾 +餃 +餅 +餉 +養 +餌 +餐 +餒 +餓 +餘 +餚 +餛 +餞 +餡 +館 +餮 +餵 +餾 +饅 +饈 +饋 +饌 +饍 +饑 +饒 +饕 +饗 +饞 +饥 +饨 +饪 +饬 +饭 +饮 +饯 +饰 +饱 +饲 +饴 +饵 +饶 +饷 +饺 +饼 +饽 +饿 +馀 +馁 +馄 +馅 +馆 +馈 +馋 +馍 +馏 +馒 +馔 +首 +馗 +香 +馥 +馨 +馬 +馭 +馮 +馳 +馴 +駁 +駄 +駅 +駆 +駐 +駒 +駕 +駛 +駝 +駭 +駱 +駿 +騁 +騎 +騏 +験 +騙 +騨 +騰 +騷 +驀 +驅 +驊 +驍 +驒 +驕 +驗 +驚 +驛 +驟 +驢 +驥 +马 +驭 +驮 +驯 +驰 +驱 +驳 +驴 +驶 +驷 +驸 +驹 +驻 +驼 +驾 +驿 +骁 +骂 +骄 +骅 +骆 +骇 +骈 +骊 +骋 +验 +骏 +骐 +骑 +骗 +骚 +骛 +骜 +骞 +骠 +骡 +骤 +骥 +骧 +骨 +骯 +骰 +骶 +骷 +骸 +骼 +髂 +髅 +髋 +髏 +髒 +髓 +體 +髖 +高 +髦 +髪 +髮 +髯 +髻 +鬃 +鬆 +鬍 +鬓 +鬚 +鬟 +鬢 +鬣 +鬥 +鬧 +鬱 +鬼 +魁 +魂 +魄 +魅 +魇 +魍 +魏 +魔 +魘 +魚 +魯 +魷 +鮑 +鮨 +鮪 +鮭 +鮮 +鯉 +鯊 +鯖 +鯛 +鯨 +鯰 +鯽 +鰍 +鰓 +鰭 +鰲 +鰻 +鰾 +鱈 +鱉 +鱔 +鱗 +鱷 +鱸 +鱼 +鱿 +鲁 +鲈 +鲍 +鲑 +鲛 +鲜 +鲟 +鲢 +鲤 +鲨 +鲫 +鲱 +鲲 +鲶 +鲷 +鲸 +鳃 +鳄 +鳅 +鳌 +鳍 +鳕 +鳖 +鳗 +鳝 +鳞 +鳥 +鳩 +鳳 +鳴 +鳶 +鴉 +鴕 +鴛 +鴦 +鴨 +鴻 +鴿 +鵑 +鵜 +鵝 +鵡 +鵬 +鵰 +鵲 +鶘 +鶩 +鶯 +鶴 +鷗 +鷲 +鷹 +鷺 +鸚 +鸞 +鸟 +鸠 +鸡 +鸢 +鸣 +鸥 +鸦 +鸨 +鸪 +鸭 +鸯 +鸳 +鸵 +鸽 +鸾 +鸿 +鹂 +鹃 +鹄 +鹅 +鹈 +鹉 +鹊 +鹌 +鹏 +鹑 +鹕 +鹘 +鹜 +鹞 +鹤 +鹦 +鹧 +鹫 +鹭 +鹰 +鹳 +鹵 +鹹 +鹼 +鹽 +鹿 +麂 +麋 +麒 +麓 +麗 +麝 +麟 +麥 +麦 +麩 +麴 +麵 +麸 +麺 +麻 +麼 +麽 +麾 +黃 +黄 +黍 +黎 +黏 +黑 +黒 +黔 +默 +黛 +黜 +黝 +點 +黠 +黨 +黯 +黴 +鼋 +鼎 +鼐 +鼓 +鼠 +鼬 +鼹 +鼻 +鼾 +齁 +齊 +齋 +齐 +齒 +齡 +齢 +齣 +齦 +齿 +龄 +龅 +龈 +龊 +龋 +龌 +龍 +龐 +龔 +龕 +龙 +龚 +龛 +龜 +龟 +︰ +︱ +︶ +︿ +﹁ +﹂ +﹍ +﹏ +﹐ +﹑ +﹒ +﹔ +﹕ +﹖ +﹗ +﹙ +﹚ +﹝ +﹞ +﹡ +﹣ +! +" +# +$ +% +& +' +( +) +* ++ +, +- +. +/ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +: +; +< += +> +? +@ +[ +\ +] +^ +_ +` +a +b +c +d +e +f +g +h +i +j +k +l +m +n +o +p +q +r +s +t +u +v +w +x +y +z +{ +| +} +~ +。 +「 +」 +、 +・ +ッ +ー +イ +ク +シ +ス +ト +ノ +フ +ラ +ル +ン +゙ +゚ + ̄ +¥ +👍 +🔥 +😂 +😎 +... +yam +10 +2017 +12 +11 +2016 +20 +30 +15 +06 +lofter +##s +2015 +by +16 +14 +18 +13 +24 +17 +2014 +21 +##0 +22 +19 +25 +23 +com +100 +00 +05 +2013 +##a +03 +09 +08 +28 +##2 +50 +01 +04 +##1 +27 +02 +2012 +##3 +26 +##e +07 +##8 +##5 +##6 +##4 +##9 +##7 +29 +2011 +40 +##t +2010 +##o +##d +##i +2009 +##n +app +www +the +##m +31 +##c +##l +##y +##r +##g +2008 +60 +http +200 +qq +##p +80 +##f +google +pixnet +90 +cookies +tripadvisor +500 +##er +##k +35 +##h +facebook +2007 +2000 +70 +##b +of +##x +##u +45 +300 +iphone +32 +1000 +2006 +48 +ip +36 +in +38 +3d +##w +##ing +55 +ctrip +##on +##v +33 +##の +to +34 +400 +id +2005 +it +37 +windows +llc +top +99 +42 +39 +000 +led +at +##an +41 +51 +52 +46 +49 +43 +53 +44 +##z +android +58 +and +59 +2004 +56 +vr +##か +5000 +2003 +47 +blogthis +twitter +54 +##le +150 +ok +2018 +57 +75 +cn +no +ios +##in +##mm +##00 +800 +on +te +3000 +65 +2001 +360 +95 +ig +lv +120 +##ng +##を +##us +##に +pc +てす +── +600 +##te +85 +2002 +88 +##ed +html +ncc +wifi +email +64 +blog +is +##10 +##て +mail +online +##al +dvd +##ic +studio +##は +##℃ +##ia +##と +line +vip +72 +##q +98 +##ce +##en +for +##is +##ra +##es +##j +usb +net +cp +1999 +asia +4g +##cm +diy +new +3c +##お +ta +66 +language +vs +apple +tw +86 +web +##ne +ipad +62 +you +##re +101 +68 +##tion +ps +de +bt +pony +atm +##2017 +1998 +67 +##ch +ceo +##or +go +##na +av +pro +cafe +96 +pinterest +97 +63 +pixstyleme3c +##ta +more +said +##2016 +1997 +mp3 +700 +##ll +nba +jun +##20 +92 +tv +1995 +pm +61 +76 +nbsp +250 +##ie +linux +##ma +cd +110 +hd +##17 +78 +##ion +77 +6000 +am +##th +##st +94 +##se +##et +69 +180 +gdp +my +105 +81 +abc +89 +flash +79 +one +93 +1990 +1996 +##ck +gps +##も +##ly +web885 +106 +2020 +91 +##ge +4000 +1500 +xd +boss +isbn +1994 +org +##ry +me +love +##11 +0fork +73 +##12 +3g +##ter +##ar +71 +82 +##la +hotel +130 +1970 +pk +83 +87 +140 +ie +##os +##30 +##el +74 +##50 +seo +cpu +##ml +p2p +84 +may +##る +sun +tue +internet +cc +posted +youtube +##at +##ン +##man +ii +##ル +##15 +abs +nt +pdf +yahoo +ago +1980 +##it +news +mac +104 +##てす +##me +##り +java +1992 +spa +##de +##nt +hk +all +plus +la +1993 +##mb +##16 +##ve +west +##da +160 +air +##い +##ps +から +##to +1989 +logo +htc +php +https +fi +momo +##son +sat +##ke +##80 +ebd +suv +wi +day +apk +##88 +##um +mv +galaxy +wiki +or +brake +##ス +1200 +する +this +1991 +mon +##こ +❤2017 +po +##ない +javascript +life +home +june +##ss +system +900 +##ー +##0 +pp +1988 +world +fb +4k +br +##as +ic +ai +leonardo +safari +##60 +live +free +xx +wed +win7 +kiehl +##co +lg +o2o +##go +us +235 +1949 +mm +しい +vfm +kanye +##90 +##2015 +##id +jr +##ey +123 +rss +##sa +##ro +##am +##no +thu +fri +350 +##sh +##ki +103 +comments +name +##のて +##pe +##ine +max +1987 +8000 +uber +##mi +##ton +wordpress +office +1986 +1985 +##ment +107 +bd +win10 +##ld +##li +gmail +bb +dior +##rs +##ri +##rd +##ます +up +cad +##® +dr +して +read +##21 +をお +##io +##99 +url +1984 +pvc +paypal +show +policy +##40 +##ty +##18 +with +##★ +##01 +txt +102 +##ba +dna +from +post +mini +ar +taiwan +john +##ga +privacy +agoda +##13 +##ny +word +##24 +##22 +##by +##ur +##hz +1982 +##ang +265 +cookie +netscape +108 +##ka +##~ +##ad +house +share +note +ibm +code +hello +nike +sim +survey +##016 +1979 +1950 +wikia +##32 +##017 +5g +cbc +##tor +##kg +1983 +##rt +##14 +campaign +store +2500 +os +##ct +##ts +##° +170 +api +##ns +365 +excel +##な +##ao +##ら +##し +~~ +##nd +university +163 +には +518 +##70 +##ya +##il +##25 +pierre +ipo +0020 +897 +##23 +hotels +##ian +のお +125 +years +6606 +##ers +##26 +high +##day +time +##ay +bug +##line +##く +##す +##be +xp +talk2yam +yamservice +10000 +coco +##dy +sony +##ies +1978 +microsoft +david +people +##ha +1960 +instagram +intel +その +##ot +iso +1981 +##va +115 +##mo +##land +xxx +man +co +ltxsw +##ation +baby +220 +##pa +##ol +1945 +7000 +tag +450 +##ue +msn +##31 +oppo +##ト +##ca +control +##om +st +chrome +##ure +##ん +be +##き +lol +##19 +した +##bo +240 +lady +##100 +##way +##から +4600 +##ko +##do +##un +4s +corporation +168 +##ni +herme +##28 +cp +978 +##up +##06 +ui +##ds +ppt +admin +three +します +bbc +re +128 +##48 +ca +##015 +##35 +hp +##ee +tpp +##た +##ive +×× +root +##cc +##ました +##ble +##ity +adobe +park +114 +et +oled +city +##ex +##ler +##ap +china +##book +20000 +view +##ice +global +##km +your +hong +##mg +out +##ms +ng +ebay +##29 +menu +ubuntu +##cy +rom +##view +open +ktv +do +server +##lo +if +english +##ね +##5 +##oo +1600 +##02 +step1 +kong +club +135 +july +inc +1976 +mr +hi +##net +touch +##ls +##ii +michael +lcd +##05 +##33 +phone +james +step2 +1300 +ios9 +##box +dc +##2 +##ley +samsung +111 +280 +pokemon +css +##ent +##les +いいえ +##1 +s8 +atom +play +bmw +##said +sa +etf +ctrl +♥yoyo♥ +##55 +2025 +##2014 +##66 +adidas +amazon +1958 +##ber +##ner +visa +##77 +##der +1800 +connectivity +##hi +firefox +109 +118 +hr +so +style +mark +pop +ol +skip +1975 +as +##27 +##ir +##61 +190 +mba +##う +##ai +le +##ver +1900 +cafe2017 +lte +super +113 +129 +##ron +amd +like +##☆ +are +##ster +we +##sk +paul +data +international +##ft +longchamp +ssd +good +##ート +##ti +reply +##my +↓↓↓ +apr +star +##ker +source +136 +js +112 +get +force +photo +##one +126 +##2013 +##ow +link +bbs +1972 +goods +##lin +python +119 +##ip +game +##ics +##ません +blue +##● +520 +##45 +page +itunes +##03 +1955 +260 +1968 +gt +gif +618 +##ff +##47 +group +くたさい +about +bar +ganji +##nce +music +lee +not +1977 +1971 +1973 +##per +an +faq +comment +##って +days +##ock +116 +##bs +1974 +1969 +v1 +player +1956 +xbox +sql +fm +f1 +139 +##ah +210 +##lv +##mp +##000 +melody +1957 +##3 +550 +17life +199 +1966 +xml +market +##au +##71 +999 +##04 +what +gl +##95 +##age +tips +##68 +book +##ting +mysql +can +1959 +230 +##ung +wonderland +watch +10℃ +##ction +9000 +mar +mobile +1946 +1962 +article +##db +part +▲top +party +って +1967 +1964 +1948 +##07 +##ore +##op +この +dj +##78 +##38 +010 +main +225 +1965 +##ong +art +320 +ad +134 +020 +##73 +117 +pm2 +japan +228 +##08 +ts +1963 +##ica +der +sm +##36 +2019 +##wa +ct +##7 +##や +##64 +1937 +homemesh +search +##85 +##れは +##tv +##di +macbook +##9 +##くたさい +service +##♥ +type +った +750 +##ier +##si +##75 +##います +##ok +best +##ット +goris +lock +##った +cf +3m +big +##ut +ftp +carol +##vi +10 +1961 +happy +sd +##ac +122 +anti +pe +cnn +iii +1920 +138 +##ラ +1940 +esp +jan +tags +##98 +##51 +august +vol +##86 +154 +##™ +##fs +##れ +##sion +design +ac +##ム +press +jordan +ppp +that +key +check +##6 +##tt +##㎡ +1080p +##lt +power +##42 +1952 +##bc +vivi +##ック +he +133 +121 +jpg +##rry +201 +175 +3500 +1947 +nb +##ted +##rn +しています +1954 +usd +##t00 +master +##ンク +001 +model +##58 +al +##09 +1953 +##34 +ram +goo +ても +##ui +127 +1930 +red +##ary +rpg +item +##pm +##41 +270 +##za +project +##2012 +hot +td +blogabstract +##ger +##62 +650 +##44 +gr2 +##します +##m +black +electronic +nfc +year +asus +また +html5 +cindy +##hd +m3 +132 +esc +##od +booking +##53 +fed +tvb +##81 +##ina +mit +165 +##いる +chan +192 +distribution +next +になる +peter +bios +steam +cm +1941 +にも +pk10 +##ix +##65 +##91 +dec +nasa +##ana +icecat +00z +b1 +will +##46 +li +se +##ji +##み +##ard +oct +##ain +jp +##ze +##bi +cio +##56 +smart +h5 +##39 +##port +curve +vpn +##nm +##dia +utc +##あり +12345678910 +##52 +rmvb +chanel +a4 +miss +##and +##im +media +who +##63 +she +girl +5s +124 +vera +##して +class +vivo +king +##フ +##ei +national +ab +1951 +5cm +888 +145 +ipod +ap +1100 +5mm +211 +ms +2756 +##69 +mp4 +msci +##po +##89 +131 +mg +index +380 +##bit +##out +##zz +##97 +##67 +158 +apec +##8 +photoshop +opec +¥799 +ては +##96 +##tes +##ast +2g +○○ +##ール +¥2899 +##ling +##よ +##ory +1938 +##ical +kitty +content +##43 +step3 +##cn +win8 +155 +vc +1400 +iphone7 +robert +##した +tcl +137 +beauty +##87 +en +dollars +##ys +##oc +step +pay +yy +a1 +##2011 +##lly +##ks +##♪ +1939 +188 +download +1944 +sep +exe +ph +います +school +gb +center +pr +street +##board +uv +##37 +##lan +winrar +##que +##ua +##com +1942 +1936 +480 +gpu +##4 +ettoday +fu +tom +##54 +##ren +##via +149 +##72 +b2b +144 +##79 +##tch +rose +arm +mb +##49 +##ial +##nn +nvidia +step4 +mvp +00㎡ +york +156 +##イ +how +cpi +591 +2765 +gov +kg +joe +##xx +mandy +pa +##ser +copyright +fashion +1935 +don +##け +ecu +##ist +##art +erp +wap +have +##lm +talk +##ek +##ning +##if +ch +##ite +video +1943 +cs +san +iot +look +##84 +##2010 +##ku +october +##ux +trump +##hs +##ide +box +141 +first +##ins +april +##ight +##83 +185 +angel +protected +aa +151 +162 +x1 +m2 +##fe +##× +##ho +size +143 +min +ofo +fun +gomaji +ex +hdmi +food +dns +march +chris +kevin +##のか +##lla +##pp +##ec +ag +ems +6s +720p +##rm +##ham +off +##92 +asp +team +fandom +ed +299 +▌♥ +##ell +info +されています +##82 +sina +4066 +161 +##able +##ctor +330 +399 +315 +dll +rights +ltd +idc +jul +3kg +1927 +142 +ma +surface +##76 +##ク +~~~ +304 +mall +eps +146 +green +##59 +map +space +donald +v2 +sodu +##light +1931 +148 +1700 +まて +310 +reserved +htm +##han +##57 +2d +178 +mod +##ise +##tions +152 +ti +##shi +doc +1933 +icp +055 +wang +##ram +shopping +aug +##pi +##well +now +wam +b2 +からお +##hu +236 +1928 +##gb +266 +f2 +##93 +153 +mix +##ef +##uan +bwl +##plus +##res +core +##ess +tea +5℃ +hktvmall +nhk +##ate +list +##ese +301 +feb +4m +inn +ての +nov +159 +12345 +daniel +##ci +pass +##bet +##nk +coffee +202 +ssl +airbnb +##ute +fbi +woshipm +skype +ea +cg +sp +##fc +##www +yes +edge +alt +007 +##94 +fpga +##ght +##gs +iso9001 +さい +##ile +##wood +##uo +image +lin +icon +american +##em +1932 +set +says +##king +##tive +blogger +##74 +なと +256 +147 +##ox +##zy +##red +##ium +##lf +nokia +claire +##リ +##ding +november +lohas +##500 +##tic +##マ +##cs +##ある +##che +##ire +##gy +##ult +db +january +win +##カ +166 +road +ptt +##ま +##つ +198 +##fa +##mer +anna +pchome +はい +udn +ef +420 +##time +##tte +2030 +##ア +g20 +white +かかります +1929 +308 +garden +eleven +di +##おります +chen +309b +777 +172 +young +cosplay +ちてない +4500 +bat +##123 +##tra +##ては +kindle +npc +steve +etc +##ern +##| +call +xperia +ces +travel +sk +s7 +##ous +1934 +##int +みいたたけます +183 +edu +file +cho +qr +##car +##our +186 +##ant +##d +eric +1914 +rends +##jo +##する +mastercard +##2000 +kb +##min +290 +##ino +vista +##ris +##ud +jack +2400 +##set +169 +pos +1912 +##her +##ou +taipei +しく +205 +beta +##ませんか +232 +##fi +express +255 +body +##ill +aphojoy +user +december +meiki +##ick +tweet +richard +##av +##ᆫ +iphone6 +##dd +ちてすか +views +##mark +321 +pd +##00 +times +##▲ +level +##ash +10g +point +5l +##ome +208 +koreanmall +##ak +george +q2 +206 +wma +tcp +##200 +スタッフ +full +mlb +##lle +##watch +tm +run +179 +911 +smith +business +##und +1919 +color +##tal +222 +171 +##less +moon +4399 +##rl +update +pcb +shop +499 +157 +little +なし +end +##mhz +van +dsp +easy +660 +##house +##key +history +##o +oh +##001 +##hy +##web +oem +let +was +##2009 +##gg +review +##wan +182 +##°c +203 +uc +title +##val +united +233 +2021 +##ons +doi +trivago +overdope +sbs +##ance +##ち +grand +special +573032185 +imf +216 +wx17house +##so +##ーム +audi +##he +london +william +##rp +##ake +science +beach +cfa +amp +ps4 +880 +##800 +##link +##hp +crm +ferragamo +bell +make +##eng +195 +under +zh +photos +2300 +##style +##ント +via +176 +da +##gi +company +i7 +##ray +thomas +370 +ufo +i5 +##max +plc +ben +back +research +8g +173 +mike +##pc +##ッフ +september +189 +##ace +vps +february +167 +pantos +wp +lisa +1921 +★★ +jquery +night +long +offer +##berg +##news +1911 +##いて +ray +fks +wto +せます +over +164 +340 +##all +##rus +1924 +##888 +##works +blogtitle +loftpermalink +##→ +187 +martin +test +ling +km +##め +15000 +fda +v3 +##ja +##ロ +wedding +かある +outlet +family +##ea +をこ +##top +story +##ness +salvatore +##lu +204 +swift +215 +room +している +oracle +##ul +1925 +sam +b2c +week +pi +rock +##のは +##a +##けと +##ean +##300 +##gle +cctv +after +chinese +##back +powered +x2 +##tan +1918 +##nes +##イン +canon +only +181 +##zi +##las +say +##oe +184 +##sd +221 +##bot +##world +##zo +sky +made +top100 +just +1926 +pmi +802 +234 +gap +##vr +177 +les +174 +▲topoct +ball +vogue +vi +ing +ofweek +cos +##list +##ort +▲topmay +##なら +##lon +として +last +##tc +##of +##bus +##gen +real +eva +##コ +a3 +nas +##lie +##ria +##coin +##bt +▲topapr +his +212 +cat +nata +vive +health +⋯⋯ +drive +sir +▲topmar +du +cup +##カー +##ook +##よう +##sy +alex +msg +tour +しました +3ce +##word +193 +ebooks +r8 +block +318 +##より +2200 +nice +pvp +207 +months +1905 +rewards +##ther +1917 +0800 +##xi +##チ +##sc +micro +850 +gg +blogfp +op +1922 +daily +m1 +264 +true +##bb +ml +##tar +##のお +##ky +anthony +196 +253 +##yo +state +218 +##ara +##aa +##rc +##tz +##ston +より +gear +##eo +##ade +ge +see +1923 +##win +##ura +ss +heart +##den +##ita +down +##sm +el +png +2100 +610 +rakuten +whatsapp +bay +dream +add +##use +680 +311 +pad +gucci +mpv +##ode +##fo +island +▲topjun +##▼ +223 +jason +214 +chicago +##❤ +しの +##hone +io +##れる +##ことか +sogo +be2 +##ology +990 +cloud +vcd +##con +2~3 +##ford +##joy +##kb +##こさいます +##rade +but +##ach +docker +##ful +rfid +ul +##ase +hit +ford +##star +580 +##○ +11 +a2 +sdk +reading +edited +##are +cmos +##mc +238 +siri +light +##ella +##ため +bloomberg +##read +pizza +##ison +jimmy +##vm +college +node +journal +ba +18k +##play +245 +##cer +20 +magic +##yu +191 +jump +288 +tt +##ings +asr +##lia +3200 +step5 +network +##cd +mc +いします +1234 +pixstyleme +273 +##600 +2800 +money +★★★★★ +1280 +12 +430 +bl +みの +act +##tus +tokyo +##rial +##life +emba +##ae +saas +tcs +##rk +##wang +summer +##sp +ko +##ving +390 +premium +##その +netflix +##ヒ +uk +mt +##lton +right +frank +two +209 +える +##ple +##cal +021 +##んな +##sen +##ville +hold +nexus +dd +##ius +てお +##mah +##なく +tila +zero +820 +ce +##tin +resort +##ws +charles +old +p10 +5d +report +##360 +##ru +##には +bus +vans +lt +##est +pv +##レ +links +rebecca +##ツ +##dm +azure +##365 +きな +limited +bit +4gb +##mon +1910 +moto +##eam +213 +1913 +var +eos +なとの +226 +blogspot +された +699 +e3 +dos +dm +fc +##ments +##ik +##kw +boy +##bin +##ata +960 +er +##せ +219 +##vin +##tu +##ula +194 +##∥ +station +##ろ +##ature +835 +files +zara +hdr +top10 +nature +950 +magazine +s6 +marriott +##シ +avira +case +##っと +tab +##ran +tony +##home +oculus +im +##ral +jean +saint +cry +307 +rosie +##force +##ini +ice +##bert +のある +##nder +##mber +pet +2600 +##◆ +plurk +▲topdec +##sis +00kg +▲topnov +720 +##ence +tim +##ω +##nc +##ても +##name +log +ips +great +ikea +malaysia +unix +##イト +3600 +##ncy +##nie +12000 +akb48 +##ye +##oid +404 +##chi +##いた +oa +xuehai +##1000 +##orm +##rf +275 +さん +##ware +##リー +980 +ho +##pro +text +##era +560 +bob +227 +##ub +##2008 +8891 +scp +avi +##zen +2022 +mi +wu +museum +qvod +apache +lake +jcb +▲topaug +★★★ +ni +##hr +hill +302 +ne +weibo +490 +ruby +##ーシ +##ヶ +##row +4d +▲topjul +iv +##ish +github +306 +mate +312 +##スト +##lot +##ane +andrew +のハイト +##tina +t1 +rf +ed2k +##vel +##900 +way +final +りの +ns +5a +705 +197 +##メ +sweet +bytes +##ene +▲topjan +231 +##cker +##2007 +##px +100g +topapp +229 +helpapp +rs +low +14k +g4g +care +630 +ldquo +あり +##fork +leave +rm +edition +##gan +##zon +##qq +▲topsep +##google +##ism +gold +224 +explorer +##zer +toyota +category +select +visual +##labels +restaurant +##md +posts +s1 +##ico +もっと +angelababy +123456 +217 +sports +s3 +mbc +1915 +してくたさい +shell +x86 +candy +##new +kbs +face +xl +470 +##here +4a +swissinfo +v8 +▲topfeb +dram +##ual +##vice +3a +##wer +sport +q1 +ios10 +public +int +card +##c +ep +au +rt +##れた +1080 +bill +##mll +kim +30 +460 +wan +##uk +##ミ +x3 +298 +0t +scott +##ming +239 +e5 +##3d +h7n9 +worldcat +brown +##あります +##vo +##led +##580 +##ax +249 +410 +##ert +paris +##~6 +polo +925 +##lr +599 +##ナ +capital +##hing +bank +cv +1g +##chat +##s +##たい +adc +##ule +2m +##e +digital +hotmail +268 +##pad +870 +bbq +quot +##ring +before +wali +##まて +mcu +2k +2b +という +costco +316 +north +333 +switch +##city +##p +philips +##mann +management +panasonic +##cl +##vd +##ping +##rge +alice +##lk +##ましょう +css3 +##ney +vision +alpha +##ular +##400 +##tter +lz +にお +##ありません +mode +gre +1916 +pci +##tm +237 +1~2 +##yan +##そ +について +##let +##キ +work +war +coach +ah +mary +##ᅵ +huang +##pt +a8 +pt +follow +##berry +1895 +##ew +a5 +ghost +##ション +##wn +##og +south +##code +girls +##rid +action +villa +git +r11 +table +games +##cket +error +##anonymoussaid +##ag +here +##ame +##gc +qa +##■ +##lis +gmp +##gin +vmalife +##cher +yu +wedding +##tis +demo +dragon +530 +soho +social +bye +##rant +river +orz +acer +325 +##↑ +##ース +##ats +261 +del +##ven +440 +ups +##ように +##ター +305 +value +macd +yougou +##dn +661 +##ano +ll +##urt +##rent +continue +script +##wen +##ect +paper +263 +319 +shift +##chel +##フト +##cat +258 +x5 +fox +243 +##さん +car +aaa +##blog +loading +##yn +##tp +kuso +799 +si +sns +イカせるテンマ +ヒンクテンマ3 +rmb +vdc +forest +central +prime +help +ultra +##rmb +##ような +241 +square +688 +##しい +のないフロクに +##field +##reen +##ors +##ju +c1 +start +510 +##air +##map +cdn +##wo +cba +stephen +m8 +100km +##get +opera +##base +##ood +vsa +com™ +##aw +##ail +251 +なのて +count +t2 +##ᅡ +##een +2700 +hop +##gp +vsc +tree +##eg +##ose +816 +285 +##ories +##shop +alphago +v4 +1909 +simon +##ᆼ +fluke62max +zip +スホンサー +##sta +louis +cr +bas +##~10 +bc +##yer +hadoop +##ube +##wi +1906 +0755 +hola +##low +place +centre +5v +d3 +##fer +252 +##750 +##media +281 +540 +0l +exchange +262 +series +##ハー +##san +eb +##bank +##k +q3 +##nge +##mail +take +##lp +259 +1888 +client +east +cache +event +vincent +##ールを +きを +##nse +sui +855 +adchoice +##и +##stry +##なたの +246 +##zone +ga +apps +sea +##ab +248 +cisco +##タ +##rner +kymco +##care +dha +##pu +##yi +minkoff +royal +p1 +への +annie +269 +collection +kpi +playstation +257 +になります +866 +bh +##bar +queen +505 +radio +1904 +andy +armani +##xy +manager +iherb +##ery +##share +spring +raid +johnson +1908 +##ob +volvo +hall +##ball +v6 +our +taylor +##hk +bi +242 +##cp +kate +bo +water +technology +##rie +サイトは +277 +##ona +##sl +hpv +303 +gtx +hip +rdquo +jayz +stone +##lex +##rum +namespace +##やり +620 +##ale +##atic +des +##erson +##ql +##ves +##type +enter +##この +##てきます +d2 +##168 +##mix +##bian +との +a9 +jj +ky +##lc +access +movie +##hc +リストに +tower +##ration +##mit +ます +##nch +ua +tel +prefix +##o2 +1907 +##point +1901 +ott +~10 +##http +##ury +baidu +##ink +member +##logy +bigbang +nownews +##js +##shot +##tb +##こと +247 +eba +##tics +##lus +ける +v5 +spark +##ama +there +##ions +god +##lls +##down +hiv +##ress +burberry +day2 +##kv +◆◆ +jeff +related +film +edit +joseph +283 +##ark +cx +32gb +order +g9 +30000 +##ans +##tty +s5 +##bee +かあります +thread +xr +buy +sh +005 +land +spotify +mx +##ari +276 +##verse +×email +sf +why +##ことて +244 +7headlines +nego +sunny +dom +exo +401 +666 +positioning +fit +rgb +##tton +278 +kiss +alexa +adam +lp +みリストを +##g +mp +##ties +##llow +amy +##du +np +002 +institute +271 +##rth +##lar +2345 +590 +##des +sidebar +15 +imax +site +##cky +##kit +##ime +##009 +season +323 +##fun +##ンター +##ひ +gogoro +a7 +pu +lily +fire +twd600 +##ッセーシを +いて +##vis +30ml +##cture +##をお +information +##オ +close +friday +##くれる +yi +nick +てすか +##tta +##tel +6500 +##lock +cbd +economy +254 +かお +267 +tinker +double +375 +8gb +voice +##app +oops +channel +today +985 +##right +raw +xyz +##+ +jim +edm +##cent +7500 +supreme +814 +ds +##its +##asia +dropbox +##てすか +##tti +books +272 +100ml +##tle +##ller +##ken +##more +##boy +sex +309 +##dom +t3 +##ider +##なります +##unch +1903 +810 +feel +5500 +##かった +##put +により +s2 +mo +##gh +men +ka +amoled +div +##tr +##n1 +port +howard +##tags +ken +dnf +##nus +adsense +##а +ide +##へ +buff +thunder +##town +##ique +has +##body +auto +pin +##erry +tee +てした +295 +number +##the +##013 +object +psp +cool +udnbkk +16gb +##mic +miui +##tro +most +r2 +##alk +##nity +1880 +±0 +##いました +428 +s4 +law +version +##oa +n1 +sgs +docomo +##tf +##ack +henry +fc2 +##ded +##sco +##014 +##rite +286 +0mm +linkedin +##ada +##now +wii +##ndy +ucbug +##◎ +sputniknews +legalminer +##ika +##xp +2gb +##bu +q10 +oo +b6 +come +##rman +cheese +ming +maker +##gm +nikon +##fig +ppi +kelly +##ります +jchere +てきます +ted +md +003 +fgo +tech +##tto +dan +soc +##gl +##len +hair +earth +640 +521 +img +##pper +##a1 +##てきる +##ロク +acca +##ition +##ference +suite +##ig +outlook +##mond +##cation +398 +##pr +279 +101vip +358 +##999 +282 +64gb +3800 +345 +airport +##over +284 +##おり +jones +##ith +lab +##su +##いるのて +co2 +town +piece +##llo +no1 +vmware +24h +##qi +focus +reader +##admin +##ora +tb +false +##log +1898 +know +lan +838 +##ces +f4 +##ume +motel +stop +##oper +na +flickr +netcomponents +##af +##─ +pose +williams +local +##ound +##cg +##site +##iko +いお +274 +5m +gsm +con +##ath +1902 +friends +##hip +cell +317 +##rey +780 +cream +##cks +012 +##dp +facebooktwitterpinterestgoogle +sso +324 +shtml +song +swiss +##mw +##キンク +lumia +xdd +string +tiffany +522 +marc +られた +insee +russell +sc +dell +##ations +ok +camera +289 +##vs +##flow +##late +classic +287 +##nter +stay +g1 +mtv +512 +##ever +##lab +##nger +qe +sata +ryan +d1 +50ml +cms +##cing +su +292 +3300 +editor +296 +##nap +security +sunday +association +##ens +##700 +##bra +acg +##かり +sofascore +とは +mkv +##ign +jonathan +gary +build +labels +##oto +tesla +moba +qi +gohappy +general +ajax +1024 +##かる +サイト +society +##test +##urs +wps +fedora +##ich +mozilla +328 +##480 +##dr +usa +urn +##lina +##r +grace +##die +##try +##ader +1250 +##なり +elle +570 +##chen +##ᆯ +price +##ten +uhz +##ough +eq +##hen +states +push +session +balance +wow +506 +##cus +##py +when +##ward +##ep +34e +wong +library +prada +##サイト +##cle +running +##ree +313 +ck +date +q4 +##ctive +##ool +##> +mk +##ira +##163 +388 +die +secret +rq +dota +buffet +は1ヶ +e6 +##ez +pan +368 +ha +##card +##cha +2a +##さ +alan +day3 +eye +f3 +##end +france +keep +adi +rna +tvbs +##ala +solo +nova +##え +##tail +##ょう +support +##ries +##なる +##ved +base +copy +iis +fps +##ways +hero +hgih +profile +fish +mu +ssh +entertainment +chang +##wd +click +cake +##ond +pre +##tom +kic +pixel +##ov +##fl +product +6a +##pd +dear +##gate +es +yumi +audio +##² +##sky +echo +bin +where +##ture +329 +##ape +find +sap +isis +##なと +nand +##101 +##load +##ream +band +a6 +525 +never +##post +festival +50cm +##we +555 +guide +314 +zenfone +##ike +335 +gd +forum +jessica +strong +alexander +##ould +software +allen +##ious +program +360° +else +lohasthree +##gar +することかてきます +please +##れます +rc +##ggle +##ric +bim +50000 +##own +eclipse +355 +brian +3ds +##side +061 +361 +##other +##ける +##tech +##ator +485 +engine +##ged +##t +plaza +##fit +cia +ngo +westbrook +shi +tbs +50mm +##みませんか +sci +291 +reuters +##ily +contextlink +##hn +af +##cil +bridge +very +##cel +1890 +cambridge +##ize +15g +##aid +##data +790 +frm +##head +award +butler +##sun +meta +##mar +america +ps3 +puma +pmid +##すか +lc +670 +kitchen +##lic +オーフン5 +きなしソフトサーヒス +そして +day1 +future +★★★★ +##text +##page +##rris +pm1 +##ket +fans +##っています +1001 +christian +bot +kids +trackback +##hai +c3 +display +##hl +n2 +1896 +idea +さんも +##sent +airmail +##ug +##men +pwm +けます +028 +##lution +369 +852 +awards +schemas +354 +asics +wikipedia +font +##tional +##vy +c2 +293 +##れている +##dget +##ein +っている +contact +pepper +スキル +339 +##~5 +294 +##uel +##ument +730 +##hang +みてす +q5 +##sue +rain +##ndi +wei +swatch +##cept +わせ +331 +popular +##ste +##tag +p2 +501 +trc +1899 +##west +##live +justin +honda +ping +messenger +##rap +v9 +543 +##とは +unity +appqq +はすへて +025 +leo +##tone +##テ +##ass +uniqlo +##010 +502 +her +jane +memory +moneydj +##tical +human +12306 +していると +##m2 +coc +miacare +##mn +tmt +##core +vim +kk +##may +fan +target +use +too +338 +435 +2050 +867 +737 +fast +##2c +services +##ope +omega +energy +##わ +pinkoi +1a +##なから +##rain +jackson +##ement +##シャンルの +374 +366 +そんな +p9 +rd +##ᆨ +1111 +##tier +##vic +zone +##│ +385 +690 +dl +isofix +cpa +m4 +322 +kimi +めて +davis +##lay +lulu +##uck +050 +weeks +qs +##hop +920 +##n +ae +##ear +~5 +eia +405 +##fly +korea +jpeg +boost +##ship +small +##リア +1860 +eur +297 +425 +valley +##iel +simple +##ude +rn +k2 +##ena +されます +non +patrick +しているから +##ナー +feed +5757 +30g +process +well +qqmei +##thing +they +aws +lu +pink +##ters +##kin +または +board +##vertisement +wine +##ien +unicode +##dge +r1 +359 +##tant +いを +##twitter +##3c +cool1 +される +##れて +##l +isp +##012 +standard +45㎡2 +402 +##150 +matt +##fu +326 +##iner +googlemsn +pixnetfacebookyahoo +##ラン +x7 +886 +##uce +メーカー +sao +##ev +##きました +##file +9678 +403 +xddd +shirt +6l +##rio +##hat +3mm +givenchy +ya +bang +##lio +monday +crystal +ロクイン +##abc +336 +head +890 +ubuntuforumwikilinuxpastechat +##vc +##~20 +##rity +cnc +7866 +ipv6 +null +1897 +##ost +yang +imsean +tiger +##fet +##ンス +352 +##= +dji +327 +ji +maria +##come +##んて +foundation +3100 +##beth +##なった +1m +601 +active +##aft +##don +3p +sr +349 +emma +##khz +living +415 +353 +1889 +341 +709 +457 +sas +x6 +##face +pptv +x4 +##mate +han +sophie +##jing +337 +fifa +##mand +other +sale +inwedding +##gn +てきちゃいます +##mmy +##pmlast +bad +nana +nbc +してみてくたさいね +なとはお +##wu +##かあります +##あ +note7 +single +##340 +せからこ +してくたさい♪この +しにはとんとんワークケートを +するとあなたにもっとマッチした +ならワークケートへ +もみつかっちゃうかも +ワークケートの +##bel +window +##dio +##ht +union +age +382 +14 +##ivity +##y +コメント +domain +neo +##isa +##lter +5k +f5 +steven +##cts +powerpoint +tft +self +g2 +ft +##テル +zol +##act +mwc +381 +343 +もう +nbapop +408 +てある +eds +ace +##room +previous +author +tomtom +il +##ets +hu +financial +☆☆☆ +っています +bp +5t +chi +1gb +##hg +fairmont +cross +008 +gay +h2 +function +##けて +356 +also +1b +625 +##ータ +##raph +1894 +3~5 +##ils +i3 +334 +avenue +##host +による +##bon +##tsu +message +navigation +50g +fintech +h6 +##ことを +8cm +##ject +##vas +##firm +credit +##wf +xxxx +form +##nor +##space +huawei +plan +json +sbl +##dc +machine +921 +392 +wish +##120 +##sol +windows7 +edward +##ために +development +washington +##nsis +lo +818 +##sio +##ym +##bor +planet +##~8 +##wt +ieee +gpa +##めて +camp +ann +gm +##tw +##oka +connect +##rss +##work +##atus +wall +chicken +soul +2mm +##times +fa +##ather +##cord +009 +##eep +hitachi +gui +harry +##pan +e1 +disney +##press +##ーション +wind +386 +frigidaire +##tl +liu +hsu +332 +basic +von +ev +いた +てきる +スホンサーサイト +learning +##ull +expedia +archives +change +##wei +santa +cut +ins +6gb +turbo +brand +cf1 +508 +004 +return +747 +##rip +h1 +##nis +##をこ +128gb +##にお +3t +application +しており +emc +rx +##oon +384 +quick +412 +15058 +wilson +wing +chapter +##bug +beyond +##cms +##dar +##oh +zoom +e2 +trip +sb +##nba +rcep +342 +aspx +ci +080 +gc +gnu +める +##count +advanced +dance +dv +##url +##ging +367 +8591 +am09 +shadow +battle +346 +##i +##cia +##という +emily +##のてす +##tation +host +ff +techorz +sars +##mini +##mporary +##ering +nc +4200 +798 +##next +cma +##mbps +##gas +##ift +##dot +##ィ +455 +##~17 +amana +##りの +426 +##ros +ir +00㎡1 +##eet +##ible +##↓ +710 +ˋ▽ˊ +##aka +dcs +iq +##v +l1 +##lor +maggie +##011 +##iu +588 +##~1 +830 +##gt +1tb +articles +create +##burg +##iki +database +fantasy +##rex +##cam +dlc +dean +##you +hard +path +gaming +victoria +maps +cb +##lee +##itor +overchicstoretvhome +systems +##xt +416 +p3 +sarah +760 +##nan +407 +486 +x9 +install +second +626 +##ann +##ph +##rcle +##nic +860 +##nar +ec +##とう +768 +metro +chocolate +##rian +~4 +##table +##しています +skin +##sn +395 +mountain +##0mm +inparadise +6m +7x24 +ib +4800 +##jia +eeworld +creative +g5 +g3 +357 +parker +ecfa +village +からの +18000 +sylvia +サーヒス +hbl +##ques +##onsored +##x2 +##きます +##v4 +##tein +ie6 +383 +##stack +389 +ver +##ads +##baby +sound +bbe +##110 +##lone +##uid +ads +022 +gundam +351 +thinkpad +006 +scrum +match +##ave +mems +##470 +##oy +##なりました +##talk +glass +lamigo +span +##eme +job +##a5 +jay +wade +kde +498 +##lace +ocean +tvg +##covery +##r3 +##ners +##rea +junior +think +##aine +cover +##ision +##sia +↓↓ +##bow +msi +413 +458 +406 +##love +711 +801 +soft +z2 +##pl +456 +1840 +mobil +mind +##uy +427 +nginx +##oi +めた +##rr +6221 +##mple +##sson +##ーシてす +371 +##nts +91tv +comhd +crv3000 +##uard +1868 +397 +deep +lost +field +gallery +##bia +rate +spf +redis +traction +930 +icloud +011 +なら +fe +jose +372 +##tory +into +sohu +fx +899 +379 +kicstart2 +##hia +すく +##~3 +##sit +ra +24 +##walk +##xure +500g +##pact +pacific +xa +natural +carlo +##250 +##walker +1850 +##can +cto +gigi +516 +##サー +pen +##hoo +ob +matlab +##b +##yy +13913459 +##iti +mango +##bbs +sense +c5 +oxford +##ニア +walker +jennifer +##ola +course +##bre +701 +##pus +##rder +lucky +075 +##ぁ +ivy +なお +##nia +sotheby +side +##ugh +joy +##orage +##ush +##bat +##dt +364 +r9 +##2d +##gio +511 +country +wear +##lax +##~7 +##moon +393 +seven +study +411 +348 +lonzo +8k +##ェ +evolution +##イフ +##kk +gs +kd +##レス +arduino +344 +b12 +##lux +arpg +##rdon +cook +##x5 +dark +five +##als +##ida +とても +sign +362 +##ちの +something +20mm +##nda +387 +##posted +fresh +tf +1870 +422 +cam +##mine +##skip +##form +##ssion +education +394 +##tee +dyson +stage +##jie +want +##night +epson +pack +あります +##ppy +テリヘル +##█ +wd +##eh +##rence +left +##lvin +golden +mhz +discovery +##trix +##n2 +loft +##uch +##dra +##sse +speed +~1 +1mdb +sorry +welcome +##urn +wave +gaga +##lmer +teddy +##160 +トラックハック +せよ +611 +##f2016 +378 +rp +##sha +rar +##あなたに +##きた +840 +holiday +##ュー +373 +074 +##vg +##nos +##rail +gartner +gi +6p +##dium +kit +488 +b3 +eco +##ろう +20g +sean +##stone +autocad +nu +##np +f16 +write +029 +m5 +##ias +images +atp +##dk +fsm +504 +1350 +ve +52kb +##xxx +##のに +##cake +414 +unit +lim +ru +1v +##ification +published +angela +16g +analytics +ak +##q +##nel +gmt +##icon +again +##₂ +##bby +ios11 +445 +かこさいます +waze +いてす +##ハ +9985 +##ust +##ティー +framework +##007 +iptv +delete +52sykb +cl +wwdc +027 +30cm +##fw +##ての +1389 +##xon +brandt +##ses +##dragon +tc +vetements +anne +monte +modern +official +##へて +##ere +##nne +##oud +もちろん +50 +etnews +##a2 +##graphy +421 +863 +##ちゃん +444 +##rtex +##てお +l2 +##gma +mount +ccd +たと +archive +morning +tan +ddos +e7 +##ホ +day4 +##ウ +gis +453 +its +495 +factory +bruce +pg +##ito +ってくたさい +guest +cdma +##lling +536 +n3 +しかし +3~4 +mega +eyes +ro +13 +women +dac +church +##jun +singapore +##facebook +6991 +starbucks +##tos +##stin +##shine +zen +##mu +tina +20℃ +1893 +##たけて +503 +465 +request +##gence +qt +##っ +1886 +347 +363 +q7 +##zzi +diary +##tore +409 +##ead +468 +cst +##osa +canada +agent +va +##jiang +##ちは +##ーク +##lam +sg +##nix +##sday +##よって +g6 +##master +bing +##zl +charlie +16 +8mm +nb40 +##ーン +thai +##ルフ +ln284ct +##itz +##2f +bonnie +##food +##lent +originals +##stro +##lts +418 +∟∣ +##bscribe +children +ntd +yesstyle +##かも +hmv +##tment +d5 +2cm +arts +sms +##pn +##я +##いい +topios9 +539 +lifestyle +virtual +##ague +xz +##deo +muji +024 +unt +##nnis +##ᅩ +faq1 +1884 +396 +##ette +fly +64㎡ +はしめまして +441 +curry +##pop +のこ +release +##← +##◆◆ +##cast +073 +ありな +500ml +##ews +5c +##stle +ios7 +##ima +787 +dog +lenovo +##r4 +roger +013 +cbs +vornado +100m +417 +##desk +##クok +##ald +1867 +9595 +2900 +##van +oil +##x +some +break +common +##jy +##lines +g7 +twice +419 +ella +nano +belle +にこ +##mes +##self +##note +jb +##ことかてきます +benz +##との +##ova +451 +save +##wing +##ますのて +kai +りは +##hua +##rect +rainer +##unge +448 +##0m +adsl +##かな +guestname +##uma +##kins +##zu +tokichoi +##price +county +##med +##mus +rmk +391 +address +vm +えて +openload +##group +##hin +##iginal +amg +urban +##oz +jobs +emi +##public +beautiful +##sch +album +##dden +##bell +jerry +works +hostel +miller +##drive +##rmin +##10 +376 +boot +828 +##370 +##fx +##cm~ +1885 +##nome +##ctionary +##oman +##lish +##cr +##hm +433 +##how +432 +francis +xi +c919 +b5 +evernote +##uc +vga +##3000 +coupe +##urg +##cca +##uality +019 +6g +れる +multi +##また +##ett +em +hey +##ani +##tax +##rma +inside +than +740 +leonnhurt +##jin +ict +れた +bird +notes +200mm +くの +##dical +##lli +result +442 +iu +ee +438 +smap +gopro +##last +yin +pure +998 +32g +けた +5kg +##dan +##rame +mama +##oot +bean +marketing +##hur +2l +bella +sync +xuite +##ground +515 +discuz +##getrelax +##ince +##bay +##5s +cj +##イス +gmat +apt +##pass +jing +##rix +c4 +rich +##とても +niusnews +##ello +bag +770 +##eting +##mobile +18 +culture +015 +##のてすか +377 +1020 +area +##ience +616 +details +gp +universal +silver +dit +はお +private +ddd +u11 +kanshu +##ified +fung +##nny +dx +##520 +tai +475 +023 +##fr +##lean +3s +##pin +429 +##rin +25000 +ly +rick +##bility +usb3 +banner +##baru +##gion +metal +dt +vdf +1871 +karl +qualcomm +bear +1010 +oldid +ian +jo +##tors +population +##ernel +1882 +mmorpg +##mv +##bike +603 +##© +ww +friend +##ager +exhibition +##del +##pods +fpx +structure +##free +##tings +kl +##rley +##copyright +##mma +california +3400 +orange +yoga +4l +canmake +honey +##anda +##コメント +595 +nikkie +##ルハイト +dhl +publishing +##mall +##gnet +20cm +513 +##クセス +##┅ +e88 +970 +##dog +fishbase +##! +##" +### +##$ +##% +##& +##' +##( +##) +##* +##+ +##, +##- +##. +##/ +##: +##; +##< +##= +##> +##? +##@ +##[ +##\ +##] +##^ +##_ +##{ +##| +##} +##~ +##£ +##¤ +##¥ +##§ +##« +##± +##³ +##µ +##· +##¹ +##º +##» +##¼ +##ß +##æ +##÷ +##ø +##đ +##ŋ +##ɔ +##ə +##ɡ +##ʰ +##ˇ +##ˈ +##ˊ +##ˋ +##ˍ +##ː +##˙ +##˚ +##ˢ +##α +##β +##γ +##δ +##ε +##η +##θ +##ι +##κ +##λ +##μ +##ν +##ο +##π +##ρ +##ς +##σ +##τ +##υ +##φ +##χ +##ψ +##б +##в +##г +##д +##е +##ж +##з +##к +##л +##м +##н +##о +##п +##р +##с +##т +##у +##ф +##х +##ц +##ч +##ш +##ы +##ь +##і +##ا +##ب +##ة +##ت +##د +##ر +##س +##ع +##ل +##م +##ن +##ه +##و +##ي +##۩ +##ก +##ง +##น +##ม +##ย +##ร +##อ +##า +##เ +##๑ +##་ +##ღ +##ᄀ +##ᄁ +##ᄂ +##ᄃ +##ᄅ +##ᄆ +##ᄇ +##ᄈ +##ᄉ +##ᄋ +##ᄌ +##ᄎ +##ᄏ +##ᄐ +##ᄑ +##ᄒ +##ᅢ +##ᅣ +##ᅥ +##ᅦ +##ᅧ +##ᅨ +##ᅪ +##ᅬ +##ᅭ +##ᅮ +##ᅯ +##ᅲ +##ᅳ +##ᅴ +##ᆷ +##ᆸ +##ᆺ +##ᆻ +##ᗜ +##ᵃ +##ᵉ +##ᵍ +##ᵏ +##ᵐ +##ᵒ +##ᵘ +##‖ +##„ +##† +##• +##‥ +##‧ +##
 +##‰ +##′ +##″ +##‹ +##› +##※ +##‿ +##⁄ +##ⁱ +##⁺ +##ⁿ +##₁ +##₃ +##₄ +##€ +##№ +##ⅰ +##ⅱ +##ⅲ +##ⅳ +##ⅴ +##↔ +##↗ +##↘ +##⇒ +##∀ +##− +##∕ +##∙ +##√ +##∞ +##∟ +##∠ +##∣ +##∩ +##∮ +##∶ +##∼ +##∽ +##≈ +##≒ +##≡ +##≤ +##≥ +##≦ +##≧ +##≪ +##≫ +##⊙ +##⋅ +##⋈ +##⋯ +##⌒ +##① +##② +##③ +##④ +##⑤ +##⑥ +##⑦ +##⑧ +##⑨ +##⑩ +##⑴ +##⑵ +##⑶ +##⑷ +##⑸ +##⒈ +##⒉ +##⒊ +##⒋ +##ⓒ +##ⓔ +##ⓘ +##━ +##┃ +##┆ +##┊ +##┌ +##└ +##├ +##┣ +##═ +##║ +##╚ +##╞ +##╠ +##╭ +##╮ +##╯ +##╰ +##╱ +##╳ +##▂ +##▃ +##▅ +##▇ +##▉ +##▋ +##▌ +##▍ +##▎ +##□ +##▪ +##▫ +##▬ +##△ +##▶ +##► +##▽ +##◇ +##◕ +##◠ +##◢ +##◤ +##☀ +##☕ +##☞ +##☺ +##☼ +##♀ +##♂ +##♠ +##♡ +##♣ +##♦ +##♫ +##♬ +##✈ +##✔ +##✕ +##✖ +##✦ +##✨ +##✪ +##✰ +##✿ +##❀ +##➜ +##➤ +##⦿ +##、 +##。 +##〃 +##々 +##〇 +##〈 +##〉 +##《 +##》 +##「 +##」 +##『 +##』 +##【 +##】 +##〓 +##〔 +##〕 +##〖 +##〗 +##〜 +##〝 +##〞 +##ぃ +##ぇ +##ぬ +##ふ +##ほ +##む +##ゃ +##ゅ +##ゆ +##ょ +##゜ +##ゝ +##ァ +##ゥ +##エ +##ォ +##ケ +##サ +##セ +##ソ +##ッ +##ニ +##ヌ +##ネ +##ノ +##ヘ +##モ +##ャ +##ヤ +##ュ +##ユ +##ョ +##ヨ +##ワ +##ヲ +##・ +##ヽ +##ㄅ +##ㄆ +##ㄇ +##ㄉ +##ㄋ +##ㄌ +##ㄍ +##ㄎ +##ㄏ +##ㄒ +##ㄚ +##ㄛ +##ㄞ +##ㄟ +##ㄢ +##ㄤ +##ㄥ +##ㄧ +##ㄨ +##ㆍ +##㈦ +##㊣ +##㗎 +##一 +##丁 +##七 +##万 +##丈 +##三 +##上 +##下 +##不 +##与 +##丐 +##丑 +##专 +##且 +##丕 +##世 +##丘 +##丙 +##业 +##丛 +##东 +##丝 +##丞 +##丟 +##両 +##丢 +##两 +##严 +##並 +##丧 +##丨 +##个 +##丫 +##中 +##丰 +##串 +##临 +##丶 +##丸 +##丹 +##为 +##主 +##丼 +##丽 +##举 +##丿 +##乂 +##乃 +##久 +##么 +##义 +##之 +##乌 +##乍 +##乎 +##乏 +##乐 +##乒 +##乓 +##乔 +##乖 +##乗 +##乘 +##乙 +##乜 +##九 +##乞 +##也 +##习 +##乡 +##书 +##乩 +##买 +##乱 +##乳 +##乾 +##亀 +##亂 +##了 +##予 +##争 +##事 +##二 +##于 +##亏 +##云 +##互 +##五 +##井 +##亘 +##亙 +##亚 +##些 +##亜 +##亞 +##亟 +##亡 +##亢 +##交 +##亥 +##亦 +##产 +##亨 +##亩 +##享 +##京 +##亭 +##亮 +##亲 +##亳 +##亵 +##人 +##亿 +##什 +##仁 +##仃 +##仄 +##仅 +##仆 +##仇 +##今 +##介 +##仍 +##从 +##仏 +##仑 +##仓 +##仔 +##仕 +##他 +##仗 +##付 +##仙 +##仝 +##仞 +##仟 +##代 +##令 +##以 +##仨 +##仪 +##们 +##仮 +##仰 +##仲 +##件 +##价 +##任 +##份 +##仿 +##企 +##伉 +##伊 +##伍 +##伎 +##伏 +##伐 +##休 +##伕 +##众 +##优 +##伙 +##会 +##伝 +##伞 +##伟 +##传 +##伢 +##伤 +##伦 +##伪 +##伫 +##伯 +##估 +##伴 +##伶 +##伸 +##伺 +##似 +##伽 +##佃 +##但 +##佇 +##佈 +##位 +##低 +##住 +##佐 +##佑 +##体 +##佔 +##何 +##佗 +##佘 +##余 +##佚 +##佛 +##作 +##佝 +##佞 +##佟 +##你 +##佢 +##佣 +##佤 +##佥 +##佩 +##佬 +##佯 +##佰 +##佳 +##併 +##佶 +##佻 +##佼 +##使 +##侃 +##侄 +##來 +##侈 +##例 +##侍 +##侏 +##侑 +##侖 +##侗 +##供 +##依 +##侠 +##価 +##侣 +##侥 +##侦 +##侧 +##侨 +##侬 +##侮 +##侯 +##侵 +##侶 +##侷 +##便 +##係 +##促 +##俄 +##俊 +##俎 +##俏 +##俐 +##俑 +##俗 +##俘 +##俚 +##保 +##俞 +##俟 +##俠 +##信 +##俨 +##俩 +##俪 +##俬 +##俭 +##修 +##俯 +##俱 +##俳 +##俸 +##俺 +##俾 +##倆 +##倉 +##個 +##倌 +##倍 +##倏 +##們 +##倒 +##倔 +##倖 +##倘 +##候 +##倚 +##倜 +##借 +##倡 +##値 +##倦 +##倩 +##倪 +##倫 +##倬 +##倭 +##倶 +##债 +##值 +##倾 +##偃 +##假 +##偈 +##偉 +##偌 +##偎 +##偏 +##偕 +##做 +##停 +##健 +##側 +##偵 +##偶 +##偷 +##偻 +##偽 +##偿 +##傀 +##傅 +##傍 +##傑 +##傘 +##備 +##傚 +##傢 +##傣 +##傥 +##储 +##傩 +##催 +##傭 +##傲 +##傳 +##債 +##傷 +##傻 +##傾 +##僅 +##働 +##像 +##僑 +##僕 +##僖 +##僚 +##僥 +##僧 +##僭 +##僮 +##僱 +##僵 +##價 +##僻 +##儀 +##儂 +##億 +##儆 +##儉 +##儋 +##儒 +##儕 +##儘 +##償 +##儡 +##優 +##儲 +##儷 +##儼 +##儿 +##兀 +##允 +##元 +##兄 +##充 +##兆 +##兇 +##先 +##光 +##克 +##兌 +##免 +##児 +##兑 +##兒 +##兔 +##兖 +##党 +##兜 +##兢 +##入 +##內 +##全 +##兩 +##八 +##公 +##六 +##兮 +##兰 +##共 +##兲 +##关 +##兴 +##兵 +##其 +##具 +##典 +##兹 +##养 +##兼 +##兽 +##冀 +##内 +##円 +##冇 +##冈 +##冉 +##冊 +##册 +##再 +##冏 +##冒 +##冕 +##冗 +##写 +##军 +##农 +##冠 +##冢 +##冤 +##冥 +##冨 +##冪 +##冬 +##冯 +##冰 +##冲 +##决 +##况 +##冶 +##冷 +##冻 +##冼 +##冽 +##冾 +##净 +##凄 +##准 +##凇 +##凈 +##凉 +##凋 +##凌 +##凍 +##减 +##凑 +##凛 +##凜 +##凝 +##几 +##凡 +##凤 +##処 +##凪 +##凭 +##凯 +##凰 +##凱 +##凳 +##凶 +##凸 +##凹 +##出 +##击 +##函 +##凿 +##刀 +##刁 +##刃 +##分 +##切 +##刈 +##刊 +##刍 +##刎 +##刑 +##划 +##列 +##刘 +##则 +##刚 +##创 +##初 +##删 +##判 +##別 +##刨 +##利 +##刪 +##别 +##刮 +##到 +##制 +##刷 +##券 +##刹 +##刺 +##刻 +##刽 +##剁 +##剂 +##剃 +##則 +##剉 +##削 +##剋 +##剌 +##前 +##剎 +##剐 +##剑 +##剔 +##剖 +##剛 +##剜 +##剝 +##剣 +##剤 +##剥 +##剧 +##剩 +##剪 +##副 +##割 +##創 +##剷 +##剽 +##剿 +##劃 +##劇 +##劈 +##劉 +##劊 +##劍 +##劏 +##劑 +##力 +##劝 +##办 +##功 +##加 +##务 +##劣 +##动 +##助 +##努 +##劫 +##劭 +##励 +##劲 +##劳 +##労 +##劵 +##効 +##劾 +##势 +##勁 +##勃 +##勇 +##勉 +##勋 +##勐 +##勒 +##動 +##勖 +##勘 +##務 +##勛 +##勝 +##勞 +##募 +##勢 +##勤 +##勧 +##勳 +##勵 +##勸 +##勺 +##勻 +##勾 +##勿 +##匀 +##包 +##匆 +##匈 +##匍 +##匐 +##匕 +##化 +##北 +##匙 +##匝 +##匠 +##匡 +##匣 +##匪 +##匮 +##匯 +##匱 +##匹 +##区 +##医 +##匾 +##匿 +##區 +##十 +##千 +##卅 +##升 +##午 +##卉 +##半 +##卍 +##华 +##协 +##卑 +##卒 +##卓 +##協 +##单 +##卖 +##南 +##単 +##博 +##卜 +##卞 +##卟 +##占 +##卡 +##卢 +##卤 +##卦 +##卧 +##卫 +##卮 +##卯 +##印 +##危 +##即 +##却 +##卵 +##卷 +##卸 +##卻 +##卿 +##厂 +##厄 +##厅 +##历 +##厉 +##压 +##厌 +##厕 +##厘 +##厚 +##厝 +##原 +##厢 +##厥 +##厦 +##厨 +##厩 +##厭 +##厮 +##厲 +##厳 +##去 +##县 +##叁 +##参 +##參 +##又 +##叉 +##及 +##友 +##双 +##反 +##収 +##发 +##叔 +##取 +##受 +##变 +##叙 +##叛 +##叟 +##叠 +##叡 +##叢 +##口 +##古 +##句 +##另 +##叨 +##叩 +##只 +##叫 +##召 +##叭 +##叮 +##可 +##台 +##叱 +##史 +##右 +##叵 +##叶 +##号 +##司 +##叹 +##叻 +##叼 +##叽 +##吁 +##吃 +##各 +##吆 +##合 +##吉 +##吊 +##吋 +##同 +##名 +##后 +##吏 +##吐 +##向 +##吒 +##吓 +##吕 +##吖 +##吗 +##君 +##吝 +##吞 +##吟 +##吠 +##吡 +##否 +##吧 +##吨 +##吩 +##含 +##听 +##吭 +##吮 +##启 +##吱 +##吳 +##吴 +##吵 +##吶 +##吸 +##吹 +##吻 +##吼 +##吽 +##吾 +##呀 +##呂 +##呃 +##呆 +##呈 +##告 +##呋 +##呎 +##呐 +##呓 +##呕 +##呗 +##员 +##呛 +##呜 +##呢 +##呤 +##呦 +##周 +##呱 +##呲 +##味 +##呵 +##呷 +##呸 +##呻 +##呼 +##命 +##咀 +##咁 +##咂 +##咄 +##咆 +##咋 +##和 +##咎 +##咏 +##咐 +##咒 +##咔 +##咕 +##咖 +##咗 +##咘 +##咙 +##咚 +##咛 +##咣 +##咤 +##咦 +##咧 +##咨 +##咩 +##咪 +##咫 +##咬 +##咭 +##咯 +##咱 +##咲 +##咳 +##咸 +##咻 +##咽 +##咿 +##哀 +##品 +##哂 +##哄 +##哆 +##哇 +##哈 +##哉 +##哋 +##哌 +##响 +##哎 +##哏 +##哐 +##哑 +##哒 +##哔 +##哗 +##哟 +##員 +##哥 +##哦 +##哧 +##哨 +##哩 +##哪 +##哭 +##哮 +##哲 +##哺 +##哼 +##哽 +##唁 +##唄 +##唆 +##唇 +##唉 +##唏 +##唐 +##唑 +##唔 +##唠 +##唤 +##唧 +##唬 +##售 +##唯 +##唰 +##唱 +##唳 +##唷 +##唸 +##唾 +##啃 +##啄 +##商 +##啉 +##啊 +##問 +##啓 +##啕 +##啖 +##啜 +##啞 +##啟 +##啡 +##啤 +##啥 +##啦 +##啧 +##啪 +##啫 +##啬 +##啮 +##啰 +##啱 +##啲 +##啵 +##啶 +##啷 +##啸 +##啻 +##啼 +##啾 +##喀 +##喂 +##喃 +##善 +##喆 +##喇 +##喉 +##喊 +##喋 +##喎 +##喏 +##喔 +##喘 +##喙 +##喚 +##喜 +##喝 +##喟 +##喧 +##喪 +##喫 +##喬 +##單 +##喰 +##喱 +##喲 +##喳 +##喵 +##営 +##喷 +##喹 +##喺 +##喻 +##喽 +##嗅 +##嗆 +##嗇 +##嗎 +##嗑 +##嗒 +##嗓 +##嗔 +##嗖 +##嗚 +##嗜 +##嗝 +##嗟 +##嗡 +##嗣 +##嗤 +##嗦 +##嗨 +##嗪 +##嗬 +##嗯 +##嗰 +##嗲 +##嗳 +##嗶 +##嗷 +##嗽 +##嘀 +##嘅 +##嘆 +##嘈 +##嘉 +##嘌 +##嘍 +##嘎 +##嘔 +##嘖 +##嘗 +##嘘 +##嘚 +##嘛 +##嘜 +##嘞 +##嘟 +##嘢 +##嘣 +##嘤 +##嘧 +##嘩 +##嘭 +##嘮 +##嘯 +##嘰 +##嘱 +##嘲 +##嘴 +##嘶 +##嘸 +##嘹 +##嘻 +##嘿 +##噁 +##噌 +##噎 +##噓 +##噔 +##噗 +##噙 +##噜 +##噠 +##噢 +##噤 +##器 +##噩 +##噪 +##噬 +##噱 +##噴 +##噶 +##噸 +##噹 +##噻 +##噼 +##嚀 +##嚇 +##嚎 +##嚏 +##嚐 +##嚓 +##嚕 +##嚟 +##嚣 +##嚥 +##嚨 +##嚮 +##嚴 +##嚷 +##嚼 +##囂 +##囉 +##囊 +##囍 +##囑 +##囔 +##囗 +##囚 +##四 +##囝 +##回 +##囟 +##因 +##囡 +##团 +##団 +##囤 +##囧 +##囪 +##囫 +##园 +##困 +##囱 +##囲 +##図 +##围 +##囹 +##固 +##国 +##图 +##囿 +##圃 +##圄 +##圆 +##圈 +##國 +##圍 +##圏 +##園 +##圓 +##圖 +##團 +##圜 +##土 +##圣 +##圧 +##在 +##圩 +##圭 +##地 +##圳 +##场 +##圻 +##圾 +##址 +##坂 +##均 +##坊 +##坍 +##坎 +##坏 +##坐 +##坑 +##块 +##坚 +##坛 +##坝 +##坞 +##坟 +##坠 +##坡 +##坤 +##坦 +##坨 +##坪 +##坯 +##坳 +##坵 +##坷 +##垂 +##垃 +##垄 +##型 +##垒 +##垚 +##垛 +##垠 +##垢 +##垣 +##垦 +##垩 +##垫 +##垭 +##垮 +##垵 +##埂 +##埃 +##埋 +##城 +##埔 +##埕 +##埗 +##域 +##埠 +##埤 +##埵 +##執 +##埸 +##培 +##基 +##埼 +##堀 +##堂 +##堃 +##堅 +##堆 +##堇 +##堑 +##堕 +##堙 +##堡 +##堤 +##堪 +##堯 +##堰 +##報 +##場 +##堵 +##堺 +##堿 +##塊 +##塌 +##塑 +##塔 +##塗 +##塘 +##塚 +##塞 +##塢 +##塩 +##填 +##塬 +##塭 +##塵 +##塾 +##墀 +##境 +##墅 +##墉 +##墊 +##墒 +##墓 +##増 +##墘 +##墙 +##墜 +##增 +##墟 +##墨 +##墩 +##墮 +##墳 +##墻 +##墾 +##壁 +##壅 +##壆 +##壇 +##壊 +##壑 +##壓 +##壕 +##壘 +##壞 +##壟 +##壢 +##壤 +##壩 +##士 +##壬 +##壮 +##壯 +##声 +##売 +##壳 +##壶 +##壹 +##壺 +##壽 +##处 +##备 +##変 +##复 +##夏 +##夔 +##夕 +##外 +##夙 +##多 +##夜 +##够 +##夠 +##夢 +##夥 +##大 +##天 +##太 +##夫 +##夭 +##央 +##夯 +##失 +##头 +##夷 +##夸 +##夹 +##夺 +##夾 +##奂 +##奄 +##奇 +##奈 +##奉 +##奋 +##奎 +##奏 +##奐 +##契 +##奔 +##奕 +##奖 +##套 +##奘 +##奚 +##奠 +##奢 +##奥 +##奧 +##奪 +##奬 +##奮 +##女 +##奴 +##奶 +##奸 +##她 +##好 +##如 +##妃 +##妄 +##妆 +##妇 +##妈 +##妊 +##妍 +##妒 +##妓 +##妖 +##妘 +##妙 +##妝 +##妞 +##妣 +##妤 +##妥 +##妨 +##妩 +##妪 +##妮 +##妲 +##妳 +##妹 +##妻 +##妾 +##姆 +##姉 +##姊 +##始 +##姍 +##姐 +##姑 +##姒 +##姓 +##委 +##姗 +##姚 +##姜 +##姝 +##姣 +##姥 +##姦 +##姨 +##姪 +##姫 +##姬 +##姹 +##姻 +##姿 +##威 +##娃 +##娄 +##娅 +##娆 +##娇 +##娉 +##娑 +##娓 +##娘 +##娛 +##娜 +##娟 +##娠 +##娣 +##娥 +##娩 +##娱 +##娲 +##娴 +##娶 +##娼 +##婀 +##婁 +##婆 +##婉 +##婊 +##婕 +##婚 +##婢 +##婦 +##婧 +##婪 +##婭 +##婴 +##婵 +##婶 +##婷 +##婺 +##婿 +##媒 +##媚 +##媛 +##媞 +##媧 +##媲 +##媳 +##媽 +##媾 +##嫁 +##嫂 +##嫉 +##嫌 +##嫑 +##嫔 +##嫖 +##嫘 +##嫚 +##嫡 +##嫣 +##嫦 +##嫩 +##嫲 +##嫵 +##嫻 +##嬅 +##嬉 +##嬌 +##嬗 +##嬛 +##嬢 +##嬤 +##嬪 +##嬰 +##嬴 +##嬷 +##嬸 +##嬿 +##孀 +##孃 +##子 +##孑 +##孔 +##孕 +##孖 +##字 +##存 +##孙 +##孚 +##孛 +##孜 +##孝 +##孟 +##孢 +##季 +##孤 +##学 +##孩 +##孪 +##孫 +##孬 +##孰 +##孱 +##孳 +##孵 +##學 +##孺 +##孽 +##孿 +##宁 +##它 +##宅 +##宇 +##守 +##安 +##宋 +##完 +##宏 +##宓 +##宕 +##宗 +##官 +##宙 +##定 +##宛 +##宜 +##宝 +##实 +##実 +##宠 +##审 +##客 +##宣 +##室 +##宥 +##宦 +##宪 +##宫 +##宮 +##宰 +##害 +##宴 +##宵 +##家 +##宸 +##容 +##宽 +##宾 +##宿 +##寂 +##寄 +##寅 +##密 +##寇 +##富 +##寐 +##寒 +##寓 +##寛 +##寝 +##寞 +##察 +##寡 +##寢 +##寥 +##實 +##寧 +##寨 +##審 +##寫 +##寬 +##寮 +##寰 +##寵 +##寶 +##寸 +##对 +##寺 +##寻 +##导 +##対 +##寿 +##封 +##専 +##射 +##将 +##將 +##專 +##尉 +##尊 +##尋 +##對 +##導 +##小 +##少 +##尔 +##尕 +##尖 +##尘 +##尚 +##尝 +##尤 +##尧 +##尬 +##就 +##尴 +##尷 +##尸 +##尹 +##尺 +##尻 +##尼 +##尽 +##尾 +##尿 +##局 +##屁 +##层 +##屄 +##居 +##屆 +##屈 +##屉 +##届 +##屋 +##屌 +##屍 +##屎 +##屏 +##屐 +##屑 +##展 +##屜 +##属 +##屠 +##屡 +##屢 +##層 +##履 +##屬 +##屯 +##山 +##屹 +##屿 +##岀 +##岁 +##岂 +##岌 +##岐 +##岑 +##岔 +##岖 +##岗 +##岘 +##岙 +##岚 +##岛 +##岡 +##岩 +##岫 +##岬 +##岭 +##岱 +##岳 +##岷 +##岸 +##峇 +##峋 +##峒 +##峙 +##峡 +##峤 +##峥 +##峦 +##峨 +##峪 +##峭 +##峯 +##峰 +##峴 +##島 +##峻 +##峽 +##崁 +##崂 +##崆 +##崇 +##崎 +##崑 +##崔 +##崖 +##崗 +##崙 +##崛 +##崧 +##崩 +##崭 +##崴 +##崽 +##嵇 +##嵊 +##嵋 +##嵌 +##嵐 +##嵘 +##嵩 +##嵬 +##嵯 +##嶂 +##嶄 +##嶇 +##嶋 +##嶙 +##嶺 +##嶼 +##嶽 +##巅 +##巍 +##巒 +##巔 +##巖 +##川 +##州 +##巡 +##巢 +##工 +##左 +##巧 +##巨 +##巩 +##巫 +##差 +##己 +##已 +##巳 +##巴 +##巷 +##巻 +##巽 +##巾 +##巿 +##币 +##市 +##布 +##帅 +##帆 +##师 +##希 +##帐 +##帑 +##帕 +##帖 +##帘 +##帚 +##帛 +##帜 +##帝 +##帥 +##带 +##帧 +##師 +##席 +##帮 +##帯 +##帰 +##帳 +##帶 +##帷 +##常 +##帼 +##帽 +##幀 +##幂 +##幄 +##幅 +##幌 +##幔 +##幕 +##幟 +##幡 +##幢 +##幣 +##幫 +##干 +##平 +##年 +##并 +##幸 +##幹 +##幺 +##幻 +##幼 +##幽 +##幾 +##广 +##庁 +##広 +##庄 +##庆 +##庇 +##床 +##序 +##庐 +##库 +##应 +##底 +##庖 +##店 +##庙 +##庚 +##府 +##庞 +##废 +##庠 +##度 +##座 +##庫 +##庭 +##庵 +##庶 +##康 +##庸 +##庹 +##庾 +##廁 +##廂 +##廃 +##廈 +##廉 +##廊 +##廓 +##廖 +##廚 +##廝 +##廟 +##廠 +##廢 +##廣 +##廬 +##廳 +##延 +##廷 +##建 +##廿 +##开 +##弁 +##异 +##弃 +##弄 +##弈 +##弊 +##弋 +##式 +##弑 +##弒 +##弓 +##弔 +##引 +##弗 +##弘 +##弛 +##弟 +##张 +##弥 +##弦 +##弧 +##弩 +##弭 +##弯 +##弱 +##張 +##強 +##弹 +##强 +##弼 +##弾 +##彅 +##彆 +##彈 +##彌 +##彎 +##归 +##当 +##录 +##彗 +##彙 +##彝 +##形 +##彤 +##彥 +##彦 +##彧 +##彩 +##彪 +##彫 +##彬 +##彭 +##彰 +##影 +##彷 +##役 +##彻 +##彼 +##彿 +##往 +##征 +##径 +##待 +##徇 +##很 +##徉 +##徊 +##律 +##後 +##徐 +##徑 +##徒 +##従 +##徕 +##得 +##徘 +##徙 +##徜 +##從 +##徠 +##御 +##徨 +##復 +##循 +##徬 +##微 +##徳 +##徴 +##徵 +##德 +##徹 +##徼 +##徽 +##心 +##必 +##忆 +##忌 +##忍 +##忏 +##忐 +##忑 +##忒 +##忖 +##志 +##忘 +##忙 +##応 +##忠 +##忡 +##忤 +##忧 +##忪 +##快 +##忱 +##念 +##忻 +##忽 +##忿 +##怀 +##态 +##怂 +##怅 +##怆 +##怎 +##怏 +##怒 +##怔 +##怕 +##怖 +##怙 +##怜 +##思 +##怠 +##怡 +##急 +##怦 +##性 +##怨 +##怪 +##怯 +##怵 +##总 +##怼 +##恁 +##恃 +##恆 +##恋 +##恍 +##恐 +##恒 +##恕 +##恙 +##恚 +##恢 +##恣 +##恤 +##恥 +##恨 +##恩 +##恪 +##恫 +##恬 +##恭 +##息 +##恰 +##恳 +##恵 +##恶 +##恸 +##恺 +##恻 +##恼 +##恿 +##悄 +##悅 +##悉 +##悌 +##悍 +##悔 +##悖 +##悚 +##悟 +##悠 +##患 +##悦 +##您 +##悩 +##悪 +##悬 +##悯 +##悱 +##悲 +##悴 +##悵 +##悶 +##悸 +##悻 +##悼 +##悽 +##情 +##惆 +##惇 +##惊 +##惋 +##惑 +##惕 +##惘 +##惚 +##惜 +##惟 +##惠 +##惡 +##惦 +##惧 +##惨 +##惩 +##惫 +##惬 +##惭 +##惮 +##惯 +##惰 +##惱 +##想 +##惴 +##惶 +##惹 +##惺 +##愁 +##愆 +##愈 +##愉 +##愍 +##意 +##愕 +##愚 +##愛 +##愜 +##感 +##愣 +##愤 +##愧 +##愫 +##愷 +##愿 +##慄 +##慈 +##態 +##慌 +##慎 +##慑 +##慕 +##慘 +##慚 +##慟 +##慢 +##慣 +##慧 +##慨 +##慫 +##慮 +##慰 +##慳 +##慵 +##慶 +##慷 +##慾 +##憂 +##憊 +##憋 +##憎 +##憐 +##憑 +##憔 +##憚 +##憤 +##憧 +##憨 +##憩 +##憫 +##憬 +##憲 +##憶 +##憾 +##懂 +##懇 +##懈 +##應 +##懊 +##懋 +##懑 +##懒 +##懦 +##懲 +##懵 +##懶 +##懷 +##懸 +##懺 +##懼 +##懾 +##懿 +##戀 +##戈 +##戊 +##戌 +##戍 +##戎 +##戏 +##成 +##我 +##戒 +##戕 +##或 +##战 +##戚 +##戛 +##戟 +##戡 +##戦 +##截 +##戬 +##戮 +##戰 +##戲 +##戳 +##戴 +##戶 +##户 +##戸 +##戻 +##戾 +##房 +##所 +##扁 +##扇 +##扈 +##扉 +##手 +##才 +##扎 +##扑 +##扒 +##打 +##扔 +##払 +##托 +##扛 +##扣 +##扦 +##执 +##扩 +##扪 +##扫 +##扬 +##扭 +##扮 +##扯 +##扰 +##扱 +##扳 +##扶 +##批 +##扼 +##找 +##承 +##技 +##抄 +##抉 +##把 +##抑 +##抒 +##抓 +##投 +##抖 +##抗 +##折 +##抚 +##抛 +##抜 +##択 +##抟 +##抠 +##抡 +##抢 +##护 +##报 +##抨 +##披 +##抬 +##抱 +##抵 +##抹 +##押 +##抽 +##抿 +##拂 +##拄 +##担 +##拆 +##拇 +##拈 +##拉 +##拋 +##拌 +##拍 +##拎 +##拐 +##拒 +##拓 +##拔 +##拖 +##拗 +##拘 +##拙 +##拚 +##招 +##拜 +##拟 +##拡 +##拢 +##拣 +##拥 +##拦 +##拧 +##拨 +##择 +##括 +##拭 +##拮 +##拯 +##拱 +##拳 +##拴 +##拷 +##拼 +##拽 +##拾 +##拿 +##持 +##挂 +##指 +##挈 +##按 +##挎 +##挑 +##挖 +##挙 +##挚 +##挛 +##挝 +##挞 +##挟 +##挠 +##挡 +##挣 +##挤 +##挥 +##挨 +##挪 +##挫 +##振 +##挲 +##挹 +##挺 +##挽 +##挾 +##捂 +##捅 +##捆 +##捉 +##捋 +##捌 +##捍 +##捎 +##捏 +##捐 +##捕 +##捞 +##损 +##捡 +##换 +##捣 +##捧 +##捨 +##捩 +##据 +##捱 +##捲 +##捶 +##捷 +##捺 +##捻 +##掀 +##掂 +##掃 +##掇 +##授 +##掉 +##掌 +##掏 +##掐 +##排 +##掖 +##掘 +##掙 +##掛 +##掠 +##採 +##探 +##掣 +##接 +##控 +##推 +##掩 +##措 +##掬 +##掰 +##掲 +##掳 +##掴 +##掷 +##掸 +##掺 +##揀 +##揃 +##揄 +##揆 +##揉 +##揍 +##描 +##提 +##插 +##揖 +##揚 +##換 +##握 +##揣 +##揩 +##揪 +##揭 +##揮 +##援 +##揶 +##揸 +##揹 +##揽 +##搀 +##搁 +##搂 +##搅 +##損 +##搏 +##搐 +##搓 +##搔 +##搖 +##搗 +##搜 +##搞 +##搡 +##搪 +##搬 +##搭 +##搵 +##搶 +##携 +##搽 +##摀 +##摁 +##摄 +##摆 +##摇 +##摈 +##摊 +##摒 +##摔 +##摘 +##摞 +##摟 +##摧 +##摩 +##摯 +##摳 +##摸 +##摹 +##摺 +##摻 +##撂 +##撃 +##撅 +##撇 +##撈 +##撐 +##撑 +##撒 +##撓 +##撕 +##撚 +##撞 +##撤 +##撥 +##撩 +##撫 +##撬 +##播 +##撮 +##撰 +##撲 +##撵 +##撷 +##撸 +##撻 +##撼 +##撿 +##擀 +##擁 +##擂 +##擄 +##擅 +##擇 +##擊 +##擋 +##操 +##擎 +##擒 +##擔 +##擘 +##據 +##擞 +##擠 +##擡 +##擢 +##擦 +##擬 +##擰 +##擱 +##擲 +##擴 +##擷 +##擺 +##擼 +##擾 +##攀 +##攏 +##攒 +##攔 +##攘 +##攙 +##攜 +##攝 +##攞 +##攢 +##攣 +##攤 +##攥 +##攪 +##攫 +##攬 +##支 +##收 +##攸 +##改 +##攻 +##放 +##政 +##故 +##效 +##敌 +##敍 +##敎 +##敏 +##救 +##敕 +##敖 +##敗 +##敘 +##教 +##敛 +##敝 +##敞 +##敢 +##散 +##敦 +##敬 +##数 +##敲 +##整 +##敵 +##敷 +##數 +##斂 +##斃 +##文 +##斋 +##斌 +##斎 +##斐 +##斑 +##斓 +##斗 +##料 +##斛 +##斜 +##斟 +##斡 +##斤 +##斥 +##斧 +##斩 +##斫 +##斬 +##断 +##斯 +##新 +##斷 +##方 +##於 +##施 +##旁 +##旃 +##旅 +##旋 +##旌 +##旎 +##族 +##旖 +##旗 +##无 +##既 +##日 +##旦 +##旧 +##旨 +##早 +##旬 +##旭 +##旮 +##旱 +##时 +##旷 +##旺 +##旻 +##昀 +##昂 +##昆 +##昇 +##昉 +##昊 +##昌 +##明 +##昏 +##易 +##昔 +##昕 +##昙 +##星 +##映 +##春 +##昧 +##昨 +##昭 +##是 +##昱 +##昴 +##昵 +##昶 +##昼 +##显 +##晁 +##時 +##晃 +##晉 +##晋 +##晌 +##晏 +##晒 +##晓 +##晔 +##晕 +##晖 +##晗 +##晚 +##晝 +##晞 +##晟 +##晤 +##晦 +##晨 +##晩 +##普 +##景 +##晰 +##晴 +##晶 +##晷 +##智 +##晾 +##暂 +##暄 +##暇 +##暈 +##暉 +##暌 +##暐 +##暑 +##暖 +##暗 +##暝 +##暢 +##暧 +##暨 +##暫 +##暮 +##暱 +##暴 +##暸 +##暹 +##曄 +##曆 +##曇 +##曉 +##曖 +##曙 +##曜 +##曝 +##曠 +##曦 +##曬 +##曰 +##曲 +##曳 +##更 +##書 +##曹 +##曼 +##曾 +##替 +##最 +##會 +##月 +##有 +##朋 +##服 +##朐 +##朔 +##朕 +##朗 +##望 +##朝 +##期 +##朦 +##朧 +##木 +##未 +##末 +##本 +##札 +##朮 +##术 +##朱 +##朴 +##朵 +##机 +##朽 +##杀 +##杂 +##权 +##杆 +##杈 +##杉 +##李 +##杏 +##材 +##村 +##杓 +##杖 +##杜 +##杞 +##束 +##杠 +##条 +##来 +##杨 +##杭 +##杯 +##杰 +##東 +##杳 +##杵 +##杷 +##杼 +##松 +##板 +##极 +##构 +##枇 +##枉 +##枋 +##析 +##枕 +##林 +##枚 +##果 +##枝 +##枢 +##枣 +##枪 +##枫 +##枭 +##枯 +##枰 +##枱 +##枳 +##架 +##枷 +##枸 +##柄 +##柏 +##某 +##柑 +##柒 +##染 +##柔 +##柘 +##柚 +##柜 +##柞 +##柠 +##柢 +##查 +##柩 +##柬 +##柯 +##柱 +##柳 +##柴 +##柵 +##査 +##柿 +##栀 +##栃 +##栄 +##栅 +##标 +##栈 +##栉 +##栋 +##栎 +##栏 +##树 +##栓 +##栖 +##栗 +##校 +##栩 +##株 +##样 +##核 +##根 +##格 +##栽 +##栾 +##桀 +##桁 +##桂 +##桃 +##桅 +##框 +##案 +##桉 +##桌 +##桎 +##桐 +##桑 +##桓 +##桔 +##桜 +##桠 +##桡 +##桢 +##档 +##桥 +##桦 +##桧 +##桨 +##桩 +##桶 +##桿 +##梁 +##梅 +##梆 +##梏 +##梓 +##梗 +##條 +##梟 +##梢 +##梦 +##梧 +##梨 +##梭 +##梯 +##械 +##梳 +##梵 +##梶 +##检 +##棂 +##棄 +##棉 +##棋 +##棍 +##棒 +##棕 +##棗 +##棘 +##棚 +##棟 +##棠 +##棣 +##棧 +##森 +##棱 +##棲 +##棵 +##棹 +##棺 +##椁 +##椅 +##椋 +##植 +##椎 +##椒 +##検 +##椪 +##椭 +##椰 +##椹 +##椽 +##椿 +##楂 +##楊 +##楓 +##楔 +##楚 +##楝 +##楞 +##楠 +##楣 +##楨 +##楫 +##業 +##楮 +##極 +##楷 +##楸 +##楹 +##楼 +##楽 +##概 +##榄 +##榆 +##榈 +##榉 +##榔 +##榕 +##榖 +##榛 +##榜 +##榨 +##榫 +##榭 +##榮 +##榱 +##榴 +##榷 +##榻 +##槁 +##槃 +##構 +##槌 +##槍 +##槎 +##槐 +##槓 +##様 +##槛 +##槟 +##槤 +##槭 +##槲 +##槳 +##槻 +##槽 +##槿 +##樁 +##樂 +##樊 +##樑 +##樓 +##標 +##樞 +##樟 +##模 +##樣 +##権 +##横 +##樫 +##樯 +##樱 +##樵 +##樸 +##樹 +##樺 +##樽 +##樾 +##橄 +##橇 +##橋 +##橐 +##橘 +##橙 +##機 +##橡 +##橢 +##橫 +##橱 +##橹 +##橼 +##檀 +##檄 +##檎 +##檐 +##檔 +##檗 +##檜 +##檢 +##檬 +##檯 +##檳 +##檸 +##檻 +##櫃 +##櫚 +##櫛 +##櫥 +##櫸 +##櫻 +##欄 +##權 +##欒 +##欖 +##欠 +##次 +##欢 +##欣 +##欧 +##欲 +##欸 +##欺 +##欽 +##款 +##歆 +##歇 +##歉 +##歌 +##歎 +##歐 +##歓 +##歙 +##歛 +##歡 +##止 +##正 +##此 +##步 +##武 +##歧 +##歩 +##歪 +##歯 +##歲 +##歳 +##歴 +##歷 +##歸 +##歹 +##死 +##歼 +##殁 +##殃 +##殆 +##殇 +##殉 +##殊 +##残 +##殒 +##殓 +##殖 +##殘 +##殞 +##殡 +##殤 +##殭 +##殯 +##殲 +##殴 +##段 +##殷 +##殺 +##殼 +##殿 +##毀 +##毁 +##毂 +##毅 +##毆 +##毋 +##母 +##毎 +##每 +##毒 +##毓 +##比 +##毕 +##毗 +##毘 +##毙 +##毛 +##毡 +##毫 +##毯 +##毽 +##氈 +##氏 +##氐 +##民 +##氓 +##气 +##氖 +##気 +##氙 +##氛 +##氟 +##氡 +##氢 +##氣 +##氤 +##氦 +##氧 +##氨 +##氪 +##氫 +##氮 +##氯 +##氰 +##氲 +##水 +##氷 +##永 +##氹 +##氾 +##汀 +##汁 +##求 +##汆 +##汇 +##汉 +##汎 +##汐 +##汕 +##汗 +##汙 +##汛 +##汝 +##汞 +##江 +##池 +##污 +##汤 +##汨 +##汩 +##汪 +##汰 +##汲 +##汴 +##汶 +##汹 +##決 +##汽 +##汾 +##沁 +##沂 +##沃 +##沅 +##沈 +##沉 +##沌 +##沏 +##沐 +##沒 +##沓 +##沖 +##沙 +##沛 +##沟 +##没 +##沢 +##沣 +##沥 +##沦 +##沧 +##沪 +##沫 +##沭 +##沮 +##沱 +##河 +##沸 +##油 +##治 +##沼 +##沽 +##沾 +##沿 +##況 +##泄 +##泉 +##泊 +##泌 +##泓 +##法 +##泗 +##泛 +##泞 +##泠 +##泡 +##波 +##泣 +##泥 +##注 +##泪 +##泫 +##泮 +##泯 +##泰 +##泱 +##泳 +##泵 +##泷 +##泸 +##泻 +##泼 +##泽 +##泾 +##洁 +##洄 +##洋 +##洒 +##洗 +##洙 +##洛 +##洞 +##津 +##洩 +##洪 +##洮 +##洱 +##洲 +##洵 +##洶 +##洸 +##洹 +##活 +##洼 +##洽 +##派 +##流 +##浃 +##浄 +##浅 +##浆 +##浇 +##浊 +##测 +##济 +##浏 +##浑 +##浒 +##浓 +##浔 +##浙 +##浚 +##浜 +##浣 +##浦 +##浩 +##浪 +##浬 +##浮 +##浯 +##浴 +##海 +##浸 +##涂 +##涅 +##涇 +##消 +##涉 +##涌 +##涎 +##涓 +##涔 +##涕 +##涙 +##涛 +##涝 +##涞 +##涟 +##涠 +##涡 +##涣 +##涤 +##润 +##涧 +##涨 +##涩 +##涪 +##涮 +##涯 +##液 +##涵 +##涸 +##涼 +##涿 +##淀 +##淄 +##淅 +##淆 +##淇 +##淋 +##淌 +##淑 +##淒 +##淖 +##淘 +##淙 +##淚 +##淞 +##淡 +##淤 +##淦 +##淨 +##淩 +##淪 +##淫 +##淬 +##淮 +##深 +##淳 +##淵 +##混 +##淹 +##淺 +##添 +##淼 +##清 +##済 +##渉 +##渊 +##渋 +##渍 +##渎 +##渐 +##渔 +##渗 +##渙 +##渚 +##減 +##渝 +##渠 +##渡 +##渣 +##渤 +##渥 +##渦 +##温 +##測 +##渭 +##港 +##渲 +##渴 +##游 +##渺 +##渾 +##湃 +##湄 +##湊 +##湍 +##湖 +##湘 +##湛 +##湟 +##湧 +##湫 +##湮 +##湯 +##湳 +##湾 +##湿 +##満 +##溃 +##溅 +##溉 +##溏 +##源 +##準 +##溜 +##溝 +##溟 +##溢 +##溥 +##溧 +##溪 +##溫 +##溯 +##溱 +##溴 +##溶 +##溺 +##溼 +##滁 +##滂 +##滄 +##滅 +##滇 +##滋 +##滌 +##滑 +##滓 +##滔 +##滕 +##滙 +##滚 +##滝 +##滞 +##滟 +##满 +##滢 +##滤 +##滥 +##滦 +##滨 +##滩 +##滬 +##滯 +##滲 +##滴 +##滷 +##滸 +##滾 +##滿 +##漁 +##漂 +##漆 +##漉 +##漏 +##漓 +##演 +##漕 +##漠 +##漢 +##漣 +##漩 +##漪 +##漫 +##漬 +##漯 +##漱 +##漲 +##漳 +##漸 +##漾 +##漿 +##潆 +##潇 +##潋 +##潍 +##潑 +##潔 +##潘 +##潛 +##潜 +##潞 +##潟 +##潢 +##潤 +##潦 +##潧 +##潭 +##潮 +##潰 +##潴 +##潸 +##潺 +##潼 +##澀 +##澄 +##澆 +##澈 +##澍 +##澎 +##澗 +##澜 +##澡 +##澤 +##澧 +##澱 +##澳 +##澹 +##激 +##濁 +##濂 +##濃 +##濑 +##濒 +##濕 +##濘 +##濛 +##濟 +##濠 +##濡 +##濤 +##濫 +##濬 +##濮 +##濯 +##濱 +##濺 +##濾 +##瀅 +##瀆 +##瀉 +##瀋 +##瀏 +##瀑 +##瀕 +##瀘 +##瀚 +##瀛 +##瀝 +##瀞 +##瀟 +##瀧 +##瀨 +##瀬 +##瀰 +##瀾 +##灌 +##灏 +##灑 +##灘 +##灝 +##灞 +##灣 +##火 +##灬 +##灭 +##灯 +##灰 +##灵 +##灶 +##灸 +##灼 +##災 +##灾 +##灿 +##炀 +##炁 +##炅 +##炉 +##炊 +##炎 +##炒 +##炔 +##炕 +##炖 +##炙 +##炜 +##炫 +##炬 +##炭 +##炮 +##炯 +##炳 +##炷 +##炸 +##点 +##為 +##炼 +##炽 +##烁 +##烂 +##烃 +##烈 +##烊 +##烏 +##烘 +##烙 +##烛 +##烟 +##烤 +##烦 +##烧 +##烨 +##烩 +##烫 +##烬 +##热 +##烯 +##烷 +##烹 +##烽 +##焉 +##焊 +##焕 +##焖 +##焗 +##焘 +##焙 +##焚 +##焜 +##無 +##焦 +##焯 +##焰 +##焱 +##然 +##焼 +##煅 +##煉 +##煊 +##煌 +##煎 +##煒 +##煖 +##煙 +##煜 +##煞 +##煤 +##煥 +##煦 +##照 +##煨 +##煩 +##煮 +##煲 +##煸 +##煽 +##熄 +##熊 +##熏 +##熒 +##熔 +##熙 +##熟 +##熠 +##熨 +##熬 +##熱 +##熵 +##熹 +##熾 +##燁 +##燃 +##燄 +##燈 +##燉 +##燊 +##燎 +##燒 +##燔 +##燕 +##燙 +##燜 +##營 +##燥 +##燦 +##燧 +##燭 +##燮 +##燴 +##燻 +##燼 +##燿 +##爆 +##爍 +##爐 +##爛 +##爪 +##爬 +##爭 +##爰 +##爱 +##爲 +##爵 +##父 +##爷 +##爸 +##爹 +##爺 +##爻 +##爽 +##爾 +##牆 +##片 +##版 +##牌 +##牍 +##牒 +##牙 +##牛 +##牝 +##牟 +##牠 +##牡 +##牢 +##牦 +##牧 +##物 +##牯 +##牲 +##牴 +##牵 +##特 +##牺 +##牽 +##犀 +##犁 +##犄 +##犊 +##犍 +##犒 +##犢 +##犧 +##犬 +##犯 +##状 +##犷 +##犸 +##犹 +##狀 +##狂 +##狄 +##狈 +##狎 +##狐 +##狒 +##狗 +##狙 +##狞 +##狠 +##狡 +##狩 +##独 +##狭 +##狮 +##狰 +##狱 +##狸 +##狹 +##狼 +##狽 +##猎 +##猕 +##猖 +##猗 +##猙 +##猛 +##猜 +##猝 +##猥 +##猩 +##猪 +##猫 +##猬 +##献 +##猴 +##猶 +##猷 +##猾 +##猿 +##獄 +##獅 +##獎 +##獐 +##獒 +##獗 +##獠 +##獣 +##獨 +##獭 +##獰 +##獲 +##獵 +##獷 +##獸 +##獺 +##獻 +##獼 +##獾 +##玄 +##率 +##玉 +##王 +##玑 +##玖 +##玛 +##玟 +##玠 +##玥 +##玩 +##玫 +##玮 +##环 +##现 +##玲 +##玳 +##玷 +##玺 +##玻 +##珀 +##珂 +##珅 +##珈 +##珉 +##珊 +##珍 +##珏 +##珐 +##珑 +##珙 +##珞 +##珠 +##珣 +##珥 +##珩 +##珪 +##班 +##珮 +##珲 +##珺 +##現 +##球 +##琅 +##理 +##琇 +##琉 +##琊 +##琍 +##琏 +##琐 +##琛 +##琢 +##琥 +##琦 +##琨 +##琪 +##琬 +##琮 +##琰 +##琲 +##琳 +##琴 +##琵 +##琶 +##琺 +##琼 +##瑀 +##瑁 +##瑄 +##瑋 +##瑕 +##瑗 +##瑙 +##瑚 +##瑛 +##瑜 +##瑞 +##瑟 +##瑠 +##瑣 +##瑤 +##瑩 +##瑪 +##瑯 +##瑰 +##瑶 +##瑾 +##璀 +##璁 +##璃 +##璇 +##璉 +##璋 +##璎 +##璐 +##璜 +##璞 +##璟 +##璧 +##璨 +##環 +##璽 +##璿 +##瓊 +##瓏 +##瓒 +##瓜 +##瓢 +##瓣 +##瓤 +##瓦 +##瓮 +##瓯 +##瓴 +##瓶 +##瓷 +##甄 +##甌 +##甕 +##甘 +##甙 +##甚 +##甜 +##生 +##產 +##産 +##甥 +##甦 +##用 +##甩 +##甫 +##甬 +##甭 +##甯 +##田 +##由 +##甲 +##申 +##电 +##男 +##甸 +##町 +##画 +##甾 +##畀 +##畅 +##界 +##畏 +##畑 +##畔 +##留 +##畜 +##畝 +##畢 +##略 +##畦 +##番 +##畫 +##異 +##畲 +##畳 +##畴 +##當 +##畸 +##畹 +##畿 +##疆 +##疇 +##疊 +##疏 +##疑 +##疔 +##疖 +##疗 +##疙 +##疚 +##疝 +##疟 +##疡 +##疣 +##疤 +##疥 +##疫 +##疮 +##疯 +##疱 +##疲 +##疳 +##疵 +##疸 +##疹 +##疼 +##疽 +##疾 +##痂 +##病 +##症 +##痈 +##痉 +##痊 +##痍 +##痒 +##痔 +##痕 +##痘 +##痙 +##痛 +##痞 +##痠 +##痢 +##痣 +##痤 +##痧 +##痨 +##痪 +##痫 +##痰 +##痱 +##痴 +##痹 +##痺 +##痼 +##痿 +##瘀 +##瘁 +##瘋 +##瘍 +##瘓 +##瘘 +##瘙 +##瘟 +##瘠 +##瘡 +##瘢 +##瘤 +##瘦 +##瘧 +##瘩 +##瘪 +##瘫 +##瘴 +##瘸 +##瘾 +##療 +##癇 +##癌 +##癒 +##癖 +##癜 +##癞 +##癡 +##癢 +##癣 +##癥 +##癫 +##癬 +##癮 +##癱 +##癲 +##癸 +##発 +##登 +##發 +##白 +##百 +##皂 +##的 +##皆 +##皇 +##皈 +##皋 +##皎 +##皑 +##皓 +##皖 +##皙 +##皚 +##皮 +##皰 +##皱 +##皴 +##皺 +##皿 +##盂 +##盃 +##盅 +##盆 +##盈 +##益 +##盎 +##盏 +##盐 +##监 +##盒 +##盔 +##盖 +##盗 +##盘 +##盛 +##盜 +##盞 +##盟 +##盡 +##監 +##盤 +##盥 +##盧 +##盪 +##目 +##盯 +##盱 +##盲 +##直 +##相 +##盹 +##盼 +##盾 +##省 +##眈 +##眉 +##看 +##県 +##眙 +##眞 +##真 +##眠 +##眦 +##眨 +##眩 +##眯 +##眶 +##眷 +##眸 +##眺 +##眼 +##眾 +##着 +##睁 +##睇 +##睏 +##睐 +##睑 +##睛 +##睜 +##睞 +##睡 +##睢 +##督 +##睥 +##睦 +##睨 +##睪 +##睫 +##睬 +##睹 +##睽 +##睾 +##睿 +##瞄 +##瞅 +##瞇 +##瞋 +##瞌 +##瞎 +##瞑 +##瞒 +##瞓 +##瞞 +##瞟 +##瞠 +##瞥 +##瞧 +##瞩 +##瞪 +##瞬 +##瞭 +##瞰 +##瞳 +##瞻 +##瞼 +##瞿 +##矇 +##矍 +##矗 +##矚 +##矛 +##矜 +##矢 +##矣 +##知 +##矩 +##矫 +##短 +##矮 +##矯 +##石 +##矶 +##矽 +##矾 +##矿 +##码 +##砂 +##砌 +##砍 +##砒 +##研 +##砖 +##砗 +##砚 +##砝 +##砣 +##砥 +##砧 +##砭 +##砰 +##砲 +##破 +##砷 +##砸 +##砺 +##砼 +##砾 +##础 +##硅 +##硐 +##硒 +##硕 +##硝 +##硫 +##硬 +##确 +##硯 +##硼 +##碁 +##碇 +##碉 +##碌 +##碍 +##碎 +##碑 +##碓 +##碗 +##碘 +##碚 +##碛 +##碟 +##碣 +##碧 +##碩 +##碰 +##碱 +##碳 +##碴 +##確 +##碼 +##碾 +##磁 +##磅 +##磊 +##磋 +##磐 +##磕 +##磚 +##磡 +##磨 +##磬 +##磯 +##磲 +##磷 +##磺 +##礁 +##礎 +##礙 +##礡 +##礦 +##礪 +##礫 +##礴 +##示 +##礼 +##社 +##祀 +##祁 +##祂 +##祇 +##祈 +##祉 +##祎 +##祐 +##祕 +##祖 +##祗 +##祚 +##祛 +##祜 +##祝 +##神 +##祟 +##祠 +##祢 +##祥 +##票 +##祭 +##祯 +##祷 +##祸 +##祺 +##祿 +##禀 +##禁 +##禄 +##禅 +##禍 +##禎 +##福 +##禛 +##禦 +##禧 +##禪 +##禮 +##禱 +##禹 +##禺 +##离 +##禽 +##禾 +##禿 +##秀 +##私 +##秃 +##秆 +##秉 +##秋 +##种 +##科 +##秒 +##秘 +##租 +##秣 +##秤 +##秦 +##秧 +##秩 +##秭 +##积 +##称 +##秸 +##移 +##秽 +##稀 +##稅 +##程 +##稍 +##税 +##稔 +##稗 +##稚 +##稜 +##稞 +##稟 +##稠 +##稣 +##種 +##稱 +##稲 +##稳 +##稷 +##稹 +##稻 +##稼 +##稽 +##稿 +##穀 +##穂 +##穆 +##穌 +##積 +##穎 +##穗 +##穢 +##穩 +##穫 +##穴 +##究 +##穷 +##穹 +##空 +##穿 +##突 +##窃 +##窄 +##窈 +##窍 +##窑 +##窒 +##窓 +##窕 +##窖 +##窗 +##窘 +##窜 +##窝 +##窟 +##窠 +##窥 +##窦 +##窨 +##窩 +##窪 +##窮 +##窯 +##窺 +##窿 +##竄 +##竅 +##竇 +##竊 +##立 +##竖 +##站 +##竜 +##竞 +##竟 +##章 +##竣 +##童 +##竭 +##端 +##競 +##竹 +##竺 +##竽 +##竿 +##笃 +##笆 +##笈 +##笋 +##笏 +##笑 +##笔 +##笙 +##笛 +##笞 +##笠 +##符 +##笨 +##第 +##笹 +##笺 +##笼 +##筆 +##等 +##筊 +##筋 +##筍 +##筏 +##筐 +##筑 +##筒 +##答 +##策 +##筛 +##筝 +##筠 +##筱 +##筲 +##筵 +##筷 +##筹 +##签 +##简 +##箇 +##箋 +##箍 +##箏 +##箐 +##箔 +##箕 +##算 +##箝 +##管 +##箩 +##箫 +##箭 +##箱 +##箴 +##箸 +##節 +##篁 +##範 +##篆 +##篇 +##築 +##篑 +##篓 +##篙 +##篝 +##篠 +##篡 +##篤 +##篩 +##篪 +##篮 +##篱 +##篷 +##簇 +##簌 +##簍 +##簡 +##簦 +##簧 +##簪 +##簫 +##簷 +##簸 +##簽 +##簾 +##簿 +##籁 +##籃 +##籌 +##籍 +##籐 +##籟 +##籠 +##籤 +##籬 +##籮 +##籲 +##米 +##类 +##籼 +##籽 +##粄 +##粉 +##粑 +##粒 +##粕 +##粗 +##粘 +##粟 +##粤 +##粥 +##粧 +##粪 +##粮 +##粱 +##粲 +##粳 +##粵 +##粹 +##粼 +##粽 +##精 +##粿 +##糅 +##糊 +##糍 +##糕 +##糖 +##糗 +##糙 +##糜 +##糞 +##糟 +##糠 +##糧 +##糬 +##糯 +##糰 +##糸 +##系 +##糾 +##紀 +##紂 +##約 +##紅 +##紉 +##紊 +##紋 +##納 +##紐 +##紓 +##純 +##紗 +##紘 +##紙 +##級 +##紛 +##紜 +##素 +##紡 +##索 +##紧 +##紫 +##紮 +##累 +##細 +##紳 +##紹 +##紺 +##終 +##絃 +##組 +##絆 +##経 +##結 +##絕 +##絞 +##絡 +##絢 +##給 +##絨 +##絮 +##統 +##絲 +##絳 +##絵 +##絶 +##絹 +##綁 +##綏 +##綑 +##經 +##継 +##続 +##綜 +##綠 +##綢 +##綦 +##綫 +##綬 +##維 +##綱 +##網 +##綴 +##綵 +##綸 +##綺 +##綻 +##綽 +##綾 +##綿 +##緊 +##緋 +##総 +##緑 +##緒 +##緘 +##線 +##緝 +##緞 +##締 +##緣 +##編 +##緩 +##緬 +##緯 +##練 +##緹 +##緻 +##縁 +##縄 +##縈 +##縛 +##縝 +##縣 +##縫 +##縮 +##縱 +##縴 +##縷 +##總 +##績 +##繁 +##繃 +##繆 +##繇 +##繋 +##織 +##繕 +##繚 +##繞 +##繡 +##繩 +##繪 +##繫 +##繭 +##繳 +##繹 +##繼 +##繽 +##纂 +##續 +##纍 +##纏 +##纓 +##纔 +##纖 +##纜 +##纠 +##红 +##纣 +##纤 +##约 +##级 +##纨 +##纪 +##纫 +##纬 +##纭 +##纯 +##纰 +##纱 +##纲 +##纳 +##纵 +##纶 +##纷 +##纸 +##纹 +##纺 +##纽 +##纾 +##线 +##绀 +##练 +##组 +##绅 +##细 +##织 +##终 +##绊 +##绍 +##绎 +##经 +##绑 +##绒 +##结 +##绔 +##绕 +##绘 +##给 +##绚 +##绛 +##络 +##绝 +##绞 +##统 +##绡 +##绢 +##绣 +##绥 +##绦 +##继 +##绩 +##绪 +##绫 +##续 +##绮 +##绯 +##绰 +##绳 +##维 +##绵 +##绶 +##绷 +##绸 +##绻 +##综 +##绽 +##绾 +##绿 +##缀 +##缄 +##缅 +##缆 +##缇 +##缈 +##缉 +##缎 +##缓 +##缔 +##缕 +##编 +##缘 +##缙 +##缚 +##缜 +##缝 +##缠 +##缢 +##缤 +##缥 +##缨 +##缩 +##缪 +##缭 +##缮 +##缰 +##缱 +##缴 +##缸 +##缺 +##缽 +##罂 +##罄 +##罌 +##罐 +##网 +##罔 +##罕 +##罗 +##罚 +##罡 +##罢 +##罩 +##罪 +##置 +##罰 +##署 +##罵 +##罷 +##罹 +##羁 +##羅 +##羈 +##羊 +##羌 +##美 +##羔 +##羚 +##羞 +##羟 +##羡 +##羣 +##群 +##羥 +##羧 +##羨 +##義 +##羯 +##羲 +##羸 +##羹 +##羽 +##羿 +##翁 +##翅 +##翊 +##翌 +##翎 +##習 +##翔 +##翘 +##翟 +##翠 +##翡 +##翦 +##翩 +##翰 +##翱 +##翳 +##翹 +##翻 +##翼 +##耀 +##老 +##考 +##耄 +##者 +##耆 +##耋 +##而 +##耍 +##耐 +##耒 +##耕 +##耗 +##耘 +##耙 +##耦 +##耨 +##耳 +##耶 +##耷 +##耸 +##耻 +##耽 +##耿 +##聂 +##聆 +##聊 +##聋 +##职 +##聒 +##联 +##聖 +##聘 +##聚 +##聞 +##聪 +##聯 +##聰 +##聲 +##聳 +##聴 +##聶 +##職 +##聽 +##聾 +##聿 +##肃 +##肄 +##肅 +##肆 +##肇 +##肉 +##肋 +##肌 +##肏 +##肓 +##肖 +##肘 +##肚 +##肛 +##肝 +##肠 +##股 +##肢 +##肤 +##肥 +##肩 +##肪 +##肮 +##肯 +##肱 +##育 +##肴 +##肺 +##肽 +##肾 +##肿 +##胀 +##胁 +##胃 +##胄 +##胆 +##背 +##胍 +##胎 +##胖 +##胚 +##胛 +##胜 +##胝 +##胞 +##胡 +##胤 +##胥 +##胧 +##胫 +##胭 +##胯 +##胰 +##胱 +##胳 +##胴 +##胶 +##胸 +##胺 +##能 +##脂 +##脅 +##脆 +##脇 +##脈 +##脉 +##脊 +##脍 +##脏 +##脐 +##脑 +##脓 +##脖 +##脘 +##脚 +##脛 +##脣 +##脩 +##脫 +##脯 +##脱 +##脲 +##脳 +##脸 +##脹 +##脾 +##腆 +##腈 +##腊 +##腋 +##腌 +##腎 +##腐 +##腑 +##腓 +##腔 +##腕 +##腥 +##腦 +##腩 +##腫 +##腭 +##腮 +##腰 +##腱 +##腳 +##腴 +##腸 +##腹 +##腺 +##腻 +##腼 +##腾 +##腿 +##膀 +##膈 +##膊 +##膏 +##膑 +##膘 +##膚 +##膛 +##膜 +##膝 +##膠 +##膦 +##膨 +##膩 +##膳 +##膺 +##膻 +##膽 +##膾 +##膿 +##臀 +##臂 +##臃 +##臆 +##臉 +##臊 +##臍 +##臓 +##臘 +##臟 +##臣 +##臥 +##臧 +##臨 +##自 +##臬 +##臭 +##至 +##致 +##臺 +##臻 +##臼 +##臾 +##舀 +##舂 +##舅 +##舆 +##與 +##興 +##舉 +##舊 +##舌 +##舍 +##舎 +##舐 +##舒 +##舔 +##舖 +##舗 +##舛 +##舜 +##舞 +##舟 +##航 +##舫 +##般 +##舰 +##舱 +##舵 +##舶 +##舷 +##舸 +##船 +##舺 +##舾 +##艇 +##艋 +##艘 +##艙 +##艦 +##艮 +##良 +##艰 +##艱 +##色 +##艳 +##艷 +##艹 +##艺 +##艾 +##节 +##芃 +##芈 +##芊 +##芋 +##芍 +##芎 +##芒 +##芙 +##芜 +##芝 +##芡 +##芥 +##芦 +##芩 +##芪 +##芫 +##芬 +##芭 +##芮 +##芯 +##花 +##芳 +##芷 +##芸 +##芹 +##芻 +##芽 +##芾 +##苁 +##苄 +##苇 +##苋 +##苍 +##苏 +##苑 +##苒 +##苓 +##苔 +##苕 +##苗 +##苛 +##苜 +##苞 +##苟 +##苡 +##苣 +##若 +##苦 +##苫 +##苯 +##英 +##苷 +##苹 +##苻 +##茁 +##茂 +##范 +##茄 +##茅 +##茉 +##茎 +##茏 +##茗 +##茜 +##茧 +##茨 +##茫 +##茬 +##茭 +##茯 +##茱 +##茲 +##茴 +##茵 +##茶 +##茸 +##茹 +##茼 +##荀 +##荃 +##荆 +##草 +##荊 +##荏 +##荐 +##荒 +##荔 +##荖 +##荘 +##荚 +##荞 +##荟 +##荠 +##荡 +##荣 +##荤 +##荥 +##荧 +##荨 +##荪 +##荫 +##药 +##荳 +##荷 +##荸 +##荻 +##荼 +##荽 +##莅 +##莆 +##莉 +##莊 +##莎 +##莒 +##莓 +##莖 +##莘 +##莞 +##莠 +##莢 +##莧 +##莪 +##莫 +##莱 +##莲 +##莴 +##获 +##莹 +##莺 +##莽 +##莿 +##菀 +##菁 +##菅 +##菇 +##菈 +##菊 +##菌 +##菏 +##菓 +##菖 +##菘 +##菜 +##菟 +##菠 +##菡 +##菩 +##華 +##菱 +##菲 +##菸 +##菽 +##萁 +##萃 +##萄 +##萊 +##萋 +##萌 +##萍 +##萎 +##萘 +##萝 +##萤 +##营 +##萦 +##萧 +##萨 +##萩 +##萬 +##萱 +##萵 +##萸 +##萼 +##落 +##葆 +##葉 +##著 +##葚 +##葛 +##葡 +##董 +##葦 +##葩 +##葫 +##葬 +##葭 +##葯 +##葱 +##葳 +##葵 +##葷 +##葺 +##蒂 +##蒋 +##蒐 +##蒔 +##蒙 +##蒜 +##蒞 +##蒟 +##蒡 +##蒨 +##蒲 +##蒸 +##蒹 +##蒻 +##蒼 +##蒿 +##蓁 +##蓄 +##蓆 +##蓉 +##蓋 +##蓑 +##蓓 +##蓖 +##蓝 +##蓟 +##蓦 +##蓬 +##蓮 +##蓼 +##蓿 +##蔑 +##蔓 +##蔔 +##蔗 +##蔘 +##蔚 +##蔡 +##蔣 +##蔥 +##蔫 +##蔬 +##蔭 +##蔵 +##蔷 +##蔺 +##蔻 +##蔼 +##蔽 +##蕁 +##蕃 +##蕈 +##蕉 +##蕊 +##蕎 +##蕙 +##蕤 +##蕨 +##蕩 +##蕪 +##蕭 +##蕲 +##蕴 +##蕻 +##蕾 +##薄 +##薅 +##薇 +##薈 +##薊 +##薏 +##薑 +##薔 +##薙 +##薛 +##薦 +##薨 +##薩 +##薪 +##薬 +##薯 +##薰 +##薹 +##藉 +##藍 +##藏 +##藐 +##藓 +##藕 +##藜 +##藝 +##藤 +##藥 +##藩 +##藹 +##藻 +##藿 +##蘆 +##蘇 +##蘊 +##蘋 +##蘑 +##蘚 +##蘭 +##蘸 +##蘼 +##蘿 +##虎 +##虏 +##虐 +##虑 +##虔 +##處 +##虚 +##虛 +##虜 +##虞 +##號 +##虢 +##虧 +##虫 +##虬 +##虱 +##虹 +##虻 +##虽 +##虾 +##蚀 +##蚁 +##蚂 +##蚊 +##蚌 +##蚓 +##蚕 +##蚜 +##蚝 +##蚣 +##蚤 +##蚩 +##蚪 +##蚯 +##蚱 +##蚵 +##蛀 +##蛆 +##蛇 +##蛊 +##蛋 +##蛎 +##蛐 +##蛔 +##蛙 +##蛛 +##蛟 +##蛤 +##蛭 +##蛮 +##蛰 +##蛳 +##蛹 +##蛻 +##蛾 +##蜀 +##蜂 +##蜃 +##蜆 +##蜇 +##蜈 +##蜊 +##蜍 +##蜒 +##蜓 +##蜕 +##蜗 +##蜘 +##蜚 +##蜜 +##蜡 +##蜢 +##蜥 +##蜱 +##蜴 +##蜷 +##蜻 +##蜿 +##蝇 +##蝈 +##蝉 +##蝌 +##蝎 +##蝕 +##蝗 +##蝙 +##蝟 +##蝠 +##蝦 +##蝨 +##蝴 +##蝶 +##蝸 +##蝼 +##螂 +##螃 +##融 +##螞 +##螢 +##螨 +##螯 +##螳 +##螺 +##蟀 +##蟄 +##蟆 +##蟋 +##蟎 +##蟑 +##蟒 +##蟠 +##蟬 +##蟲 +##蟹 +##蟻 +##蟾 +##蠅 +##蠍 +##蠔 +##蠕 +##蠛 +##蠟 +##蠡 +##蠢 +##蠣 +##蠱 +##蠶 +##蠹 +##蠻 +##血 +##衄 +##衅 +##衆 +##行 +##衍 +##術 +##衔 +##街 +##衙 +##衛 +##衝 +##衞 +##衡 +##衢 +##衣 +##补 +##表 +##衩 +##衫 +##衬 +##衮 +##衰 +##衲 +##衷 +##衹 +##衾 +##衿 +##袁 +##袂 +##袄 +##袅 +##袈 +##袋 +##袍 +##袒 +##袖 +##袜 +##袞 +##袤 +##袪 +##被 +##袭 +##袱 +##裁 +##裂 +##装 +##裆 +##裊 +##裏 +##裔 +##裕 +##裘 +##裙 +##補 +##裝 +##裟 +##裡 +##裤 +##裨 +##裱 +##裳 +##裴 +##裸 +##裹 +##製 +##裾 +##褂 +##複 +##褐 +##褒 +##褓 +##褔 +##褚 +##褥 +##褪 +##褫 +##褲 +##褶 +##褻 +##襁 +##襄 +##襟 +##襠 +##襪 +##襬 +##襯 +##襲 +##西 +##要 +##覃 +##覆 +##覇 +##見 +##規 +##覓 +##視 +##覚 +##覦 +##覧 +##親 +##覬 +##観 +##覷 +##覺 +##覽 +##觀 +##见 +##观 +##规 +##觅 +##视 +##览 +##觉 +##觊 +##觎 +##觐 +##觑 +##角 +##觞 +##解 +##觥 +##触 +##觸 +##言 +##訂 +##計 +##訊 +##討 +##訓 +##訕 +##訖 +##託 +##記 +##訛 +##訝 +##訟 +##訣 +##訥 +##訪 +##設 +##許 +##訳 +##訴 +##訶 +##診 +##註 +##証 +##詆 +##詐 +##詔 +##評 +##詛 +##詞 +##詠 +##詡 +##詢 +##詣 +##試 +##詩 +##詫 +##詬 +##詭 +##詮 +##詰 +##話 +##該 +##詳 +##詹 +##詼 +##誅 +##誇 +##誉 +##誌 +##認 +##誓 +##誕 +##誘 +##語 +##誠 +##誡 +##誣 +##誤 +##誥 +##誦 +##誨 +##說 +##説 +##読 +##誰 +##課 +##誹 +##誼 +##調 +##諄 +##談 +##請 +##諏 +##諒 +##論 +##諗 +##諜 +##諡 +##諦 +##諧 +##諫 +##諭 +##諮 +##諱 +##諳 +##諷 +##諸 +##諺 +##諾 +##謀 +##謁 +##謂 +##謄 +##謊 +##謎 +##謐 +##謔 +##謗 +##謙 +##講 +##謝 +##謠 +##謨 +##謬 +##謹 +##謾 +##譁 +##證 +##譎 +##譏 +##識 +##譙 +##譚 +##譜 +##警 +##譬 +##譯 +##議 +##譲 +##譴 +##護 +##譽 +##讀 +##變 +##讓 +##讚 +##讞 +##计 +##订 +##认 +##讥 +##讧 +##讨 +##让 +##讪 +##讫 +##训 +##议 +##讯 +##记 +##讲 +##讳 +##讴 +##讶 +##讷 +##许 +##讹 +##论 +##讼 +##讽 +##设 +##访 +##诀 +##证 +##诃 +##评 +##诅 +##识 +##诈 +##诉 +##诊 +##诋 +##词 +##诏 +##译 +##试 +##诗 +##诘 +##诙 +##诚 +##诛 +##话 +##诞 +##诟 +##诠 +##诡 +##询 +##诣 +##诤 +##该 +##详 +##诧 +##诩 +##诫 +##诬 +##语 +##误 +##诰 +##诱 +##诲 +##说 +##诵 +##诶 +##请 +##诸 +##诺 +##读 +##诽 +##课 +##诿 +##谀 +##谁 +##调 +##谄 +##谅 +##谆 +##谈 +##谊 +##谋 +##谌 +##谍 +##谎 +##谏 +##谐 +##谑 +##谒 +##谓 +##谔 +##谕 +##谗 +##谘 +##谙 +##谚 +##谛 +##谜 +##谟 +##谢 +##谣 +##谤 +##谥 +##谦 +##谧 +##谨 +##谩 +##谪 +##谬 +##谭 +##谯 +##谱 +##谲 +##谴 +##谶 +##谷 +##豁 +##豆 +##豇 +##豈 +##豉 +##豊 +##豌 +##豎 +##豐 +##豔 +##豚 +##象 +##豢 +##豪 +##豫 +##豬 +##豹 +##豺 +##貂 +##貅 +##貌 +##貓 +##貔 +##貘 +##貝 +##貞 +##負 +##財 +##貢 +##貧 +##貨 +##販 +##貪 +##貫 +##責 +##貯 +##貰 +##貳 +##貴 +##貶 +##買 +##貸 +##費 +##貼 +##貽 +##貿 +##賀 +##賁 +##賂 +##賃 +##賄 +##資 +##賈 +##賊 +##賑 +##賓 +##賜 +##賞 +##賠 +##賡 +##賢 +##賣 +##賤 +##賦 +##質 +##賬 +##賭 +##賴 +##賺 +##購 +##賽 +##贅 +##贈 +##贊 +##贍 +##贏 +##贓 +##贖 +##贛 +##贝 +##贞 +##负 +##贡 +##财 +##责 +##贤 +##败 +##账 +##货 +##质 +##贩 +##贪 +##贫 +##贬 +##购 +##贮 +##贯 +##贰 +##贱 +##贲 +##贴 +##贵 +##贷 +##贸 +##费 +##贺 +##贻 +##贼 +##贾 +##贿 +##赁 +##赂 +##赃 +##资 +##赅 +##赈 +##赊 +##赋 +##赌 +##赎 +##赏 +##赐 +##赓 +##赔 +##赖 +##赘 +##赚 +##赛 +##赝 +##赞 +##赠 +##赡 +##赢 +##赣 +##赤 +##赦 +##赧 +##赫 +##赭 +##走 +##赳 +##赴 +##赵 +##赶 +##起 +##趁 +##超 +##越 +##趋 +##趕 +##趙 +##趟 +##趣 +##趨 +##足 +##趴 +##趵 +##趸 +##趺 +##趾 +##跃 +##跄 +##跆 +##跋 +##跌 +##跎 +##跑 +##跖 +##跚 +##跛 +##距 +##跟 +##跡 +##跤 +##跨 +##跩 +##跪 +##路 +##跳 +##践 +##跷 +##跹 +##跺 +##跻 +##踉 +##踊 +##踌 +##踏 +##踐 +##踝 +##踞 +##踟 +##踢 +##踩 +##踪 +##踮 +##踱 +##踴 +##踵 +##踹 +##蹂 +##蹄 +##蹇 +##蹈 +##蹉 +##蹊 +##蹋 +##蹑 +##蹒 +##蹙 +##蹟 +##蹣 +##蹤 +##蹦 +##蹩 +##蹬 +##蹭 +##蹲 +##蹴 +##蹶 +##蹺 +##蹼 +##蹿 +##躁 +##躇 +##躉 +##躊 +##躋 +##躍 +##躏 +##躪 +##身 +##躬 +##躯 +##躲 +##躺 +##軀 +##車 +##軋 +##軌 +##軍 +##軒 +##軟 +##転 +##軸 +##軼 +##軽 +##軾 +##較 +##載 +##輒 +##輓 +##輔 +##輕 +##輛 +##輝 +##輟 +##輩 +##輪 +##輯 +##輸 +##輻 +##輾 +##輿 +##轄 +##轅 +##轆 +##轉 +##轍 +##轎 +##轟 +##车 +##轧 +##轨 +##轩 +##转 +##轭 +##轮 +##软 +##轰 +##轲 +##轴 +##轶 +##轻 +##轼 +##载 +##轿 +##较 +##辄 +##辅 +##辆 +##辇 +##辈 +##辉 +##辊 +##辍 +##辐 +##辑 +##输 +##辕 +##辖 +##辗 +##辘 +##辙 +##辛 +##辜 +##辞 +##辟 +##辣 +##辦 +##辨 +##辩 +##辫 +##辭 +##辮 +##辯 +##辰 +##辱 +##農 +##边 +##辺 +##辻 +##込 +##辽 +##达 +##迁 +##迂 +##迄 +##迅 +##过 +##迈 +##迎 +##运 +##近 +##返 +##还 +##这 +##进 +##远 +##违 +##连 +##迟 +##迢 +##迤 +##迥 +##迦 +##迩 +##迪 +##迫 +##迭 +##述 +##迴 +##迷 +##迸 +##迹 +##迺 +##追 +##退 +##送 +##适 +##逃 +##逅 +##逆 +##选 +##逊 +##逍 +##透 +##逐 +##递 +##途 +##逕 +##逗 +##這 +##通 +##逛 +##逝 +##逞 +##速 +##造 +##逢 +##連 +##逮 +##週 +##進 +##逵 +##逶 +##逸 +##逻 +##逼 +##逾 +##遁 +##遂 +##遅 +##遇 +##遊 +##運 +##遍 +##過 +##遏 +##遐 +##遑 +##遒 +##道 +##達 +##違 +##遗 +##遙 +##遛 +##遜 +##遞 +##遠 +##遢 +##遣 +##遥 +##遨 +##適 +##遭 +##遮 +##遲 +##遴 +##遵 +##遶 +##遷 +##選 +##遺 +##遼 +##遽 +##避 +##邀 +##邁 +##邂 +##邃 +##還 +##邇 +##邈 +##邊 +##邋 +##邏 +##邑 +##邓 +##邕 +##邛 +##邝 +##邢 +##那 +##邦 +##邨 +##邪 +##邬 +##邮 +##邯 +##邰 +##邱 +##邳 +##邵 +##邸 +##邹 +##邺 +##邻 +##郁 +##郅 +##郊 +##郎 +##郑 +##郜 +##郝 +##郡 +##郢 +##郤 +##郦 +##郧 +##部 +##郫 +##郭 +##郴 +##郵 +##郷 +##郸 +##都 +##鄂 +##鄉 +##鄒 +##鄔 +##鄙 +##鄞 +##鄢 +##鄧 +##鄭 +##鄰 +##鄱 +##鄲 +##鄺 +##酉 +##酊 +##酋 +##酌 +##配 +##酐 +##酒 +##酗 +##酚 +##酝 +##酢 +##酣 +##酥 +##酩 +##酪 +##酬 +##酮 +##酯 +##酰 +##酱 +##酵 +##酶 +##酷 +##酸 +##酿 +##醃 +##醇 +##醉 +##醋 +##醍 +##醐 +##醒 +##醚 +##醛 +##醜 +##醞 +##醣 +##醪 +##醫 +##醬 +##醮 +##醯 +##醴 +##醺 +##釀 +##釁 +##采 +##釉 +##释 +##釋 +##里 +##重 +##野 +##量 +##釐 +##金 +##釗 +##釘 +##釜 +##針 +##釣 +##釦 +##釧 +##釵 +##鈀 +##鈉 +##鈍 +##鈎 +##鈔 +##鈕 +##鈞 +##鈣 +##鈦 +##鈪 +##鈴 +##鈺 +##鈾 +##鉀 +##鉄 +##鉅 +##鉉 +##鉑 +##鉗 +##鉚 +##鉛 +##鉤 +##鉴 +##鉻 +##銀 +##銃 +##銅 +##銑 +##銓 +##銖 +##銘 +##銜 +##銬 +##銭 +##銮 +##銳 +##銷 +##銹 +##鋁 +##鋅 +##鋒 +##鋤 +##鋪 +##鋰 +##鋸 +##鋼 +##錄 +##錐 +##錘 +##錚 +##錠 +##錢 +##錦 +##錨 +##錫 +##錮 +##錯 +##録 +##錳 +##錶 +##鍊 +##鍋 +##鍍 +##鍛 +##鍥 +##鍰 +##鍵 +##鍺 +##鍾 +##鎂 +##鎊 +##鎌 +##鎏 +##鎔 +##鎖 +##鎗 +##鎚 +##鎧 +##鎬 +##鎮 +##鎳 +##鏈 +##鏖 +##鏗 +##鏘 +##鏞 +##鏟 +##鏡 +##鏢 +##鏤 +##鏽 +##鐘 +##鐮 +##鐲 +##鐳 +##鐵 +##鐸 +##鐺 +##鑄 +##鑊 +##鑑 +##鑒 +##鑣 +##鑫 +##鑰 +##鑲 +##鑼 +##鑽 +##鑾 +##鑿 +##针 +##钉 +##钊 +##钎 +##钏 +##钒 +##钓 +##钗 +##钙 +##钛 +##钜 +##钝 +##钞 +##钟 +##钠 +##钡 +##钢 +##钣 +##钤 +##钥 +##钦 +##钧 +##钨 +##钩 +##钮 +##钯 +##钰 +##钱 +##钳 +##钴 +##钵 +##钺 +##钻 +##钼 +##钾 +##钿 +##铀 +##铁 +##铂 +##铃 +##铄 +##铅 +##铆 +##铉 +##铎 +##铐 +##铛 +##铜 +##铝 +##铠 +##铡 +##铢 +##铣 +##铤 +##铨 +##铩 +##铬 +##铭 +##铮 +##铰 +##铲 +##铵 +##银 +##铸 +##铺 +##链 +##铿 +##销 +##锁 +##锂 +##锄 +##锅 +##锆 +##锈 +##锉 +##锋 +##锌 +##锏 +##锐 +##锑 +##错 +##锚 +##锟 +##锡 +##锢 +##锣 +##锤 +##锥 +##锦 +##锭 +##键 +##锯 +##锰 +##锲 +##锵 +##锹 +##锺 +##锻 +##镀 +##镁 +##镂 +##镇 +##镉 +##镌 +##镍 +##镐 +##镑 +##镕 +##镖 +##镗 +##镛 +##镜 +##镣 +##镭 +##镯 +##镰 +##镳 +##镶 +##長 +##长 +##門 +##閃 +##閉 +##開 +##閎 +##閏 +##閑 +##閒 +##間 +##閔 +##閘 +##閡 +##関 +##閣 +##閥 +##閨 +##閩 +##閱 +##閲 +##閹 +##閻 +##閾 +##闆 +##闇 +##闊 +##闌 +##闍 +##闔 +##闕 +##闖 +##闘 +##關 +##闡 +##闢 +##门 +##闪 +##闫 +##闭 +##问 +##闯 +##闰 +##闲 +##间 +##闵 +##闷 +##闸 +##闹 +##闺 +##闻 +##闽 +##闾 +##阀 +##阁 +##阂 +##阅 +##阆 +##阇 +##阈 +##阉 +##阎 +##阐 +##阑 +##阔 +##阕 +##阖 +##阙 +##阚 +##阜 +##队 +##阡 +##阪 +##阮 +##阱 +##防 +##阳 +##阴 +##阵 +##阶 +##阻 +##阿 +##陀 +##陂 +##附 +##际 +##陆 +##陇 +##陈 +##陋 +##陌 +##降 +##限 +##陕 +##陛 +##陝 +##陞 +##陟 +##陡 +##院 +##陣 +##除 +##陨 +##险 +##陪 +##陰 +##陲 +##陳 +##陵 +##陶 +##陷 +##陸 +##険 +##陽 +##隅 +##隆 +##隈 +##隊 +##隋 +##隍 +##階 +##随 +##隐 +##隔 +##隕 +##隘 +##隙 +##際 +##障 +##隠 +##隣 +##隧 +##隨 +##險 +##隱 +##隴 +##隶 +##隸 +##隻 +##隼 +##隽 +##难 +##雀 +##雁 +##雄 +##雅 +##集 +##雇 +##雉 +##雋 +##雌 +##雍 +##雎 +##雏 +##雑 +##雒 +##雕 +##雖 +##雙 +##雛 +##雜 +##雞 +##離 +##難 +##雨 +##雪 +##雯 +##雰 +##雲 +##雳 +##零 +##雷 +##雹 +##電 +##雾 +##需 +##霁 +##霄 +##霆 +##震 +##霈 +##霉 +##霊 +##霍 +##霎 +##霏 +##霑 +##霓 +##霖 +##霜 +##霞 +##霧 +##霭 +##霰 +##露 +##霸 +##霹 +##霽 +##霾 +##靂 +##靄 +##靈 +##青 +##靓 +##靖 +##静 +##靚 +##靛 +##靜 +##非 +##靠 +##靡 +##面 +##靥 +##靦 +##革 +##靳 +##靴 +##靶 +##靼 +##鞅 +##鞋 +##鞍 +##鞏 +##鞑 +##鞘 +##鞠 +##鞣 +##鞦 +##鞭 +##韆 +##韋 +##韌 +##韓 +##韜 +##韦 +##韧 +##韩 +##韬 +##韭 +##音 +##韵 +##韶 +##韻 +##響 +##頁 +##頂 +##頃 +##項 +##順 +##須 +##頌 +##預 +##頑 +##頒 +##頓 +##頗 +##領 +##頜 +##頡 +##頤 +##頫 +##頭 +##頰 +##頷 +##頸 +##頹 +##頻 +##頼 +##顆 +##題 +##額 +##顎 +##顏 +##顔 +##願 +##顛 +##類 +##顧 +##顫 +##顯 +##顱 +##顴 +##页 +##顶 +##顷 +##项 +##顺 +##须 +##顼 +##顽 +##顾 +##顿 +##颁 +##颂 +##预 +##颅 +##领 +##颇 +##颈 +##颉 +##颊 +##颌 +##颍 +##颐 +##频 +##颓 +##颔 +##颖 +##颗 +##题 +##颚 +##颛 +##颜 +##额 +##颞 +##颠 +##颡 +##颢 +##颤 +##颦 +##颧 +##風 +##颯 +##颱 +##颳 +##颶 +##颼 +##飄 +##飆 +##风 +##飒 +##飓 +##飕 +##飘 +##飙 +##飚 +##飛 +##飞 +##食 +##飢 +##飨 +##飩 +##飪 +##飯 +##飲 +##飼 +##飽 +##飾 +##餃 +##餅 +##餉 +##養 +##餌 +##餐 +##餒 +##餓 +##餘 +##餚 +##餛 +##餞 +##餡 +##館 +##餮 +##餵 +##餾 +##饅 +##饈 +##饋 +##饌 +##饍 +##饑 +##饒 +##饕 +##饗 +##饞 +##饥 +##饨 +##饪 +##饬 +##饭 +##饮 +##饯 +##饰 +##饱 +##饲 +##饴 +##饵 +##饶 +##饷 +##饺 +##饼 +##饽 +##饿 +##馀 +##馁 +##馄 +##馅 +##馆 +##馈 +##馋 +##馍 +##馏 +##馒 +##馔 +##首 +##馗 +##香 +##馥 +##馨 +##馬 +##馭 +##馮 +##馳 +##馴 +##駁 +##駄 +##駅 +##駆 +##駐 +##駒 +##駕 +##駛 +##駝 +##駭 +##駱 +##駿 +##騁 +##騎 +##騏 +##験 +##騙 +##騨 +##騰 +##騷 +##驀 +##驅 +##驊 +##驍 +##驒 +##驕 +##驗 +##驚 +##驛 +##驟 +##驢 +##驥 +##马 +##驭 +##驮 +##驯 +##驰 +##驱 +##驳 +##驴 +##驶 +##驷 +##驸 +##驹 +##驻 +##驼 +##驾 +##驿 +##骁 +##骂 +##骄 +##骅 +##骆 +##骇 +##骈 +##骊 +##骋 +##验 +##骏 +##骐 +##骑 +##骗 +##骚 +##骛 +##骜 +##骞 +##骠 +##骡 +##骤 +##骥 +##骧 +##骨 +##骯 +##骰 +##骶 +##骷 +##骸 +##骼 +##髂 +##髅 +##髋 +##髏 +##髒 +##髓 +##體 +##髖 +##高 +##髦 +##髪 +##髮 +##髯 +##髻 +##鬃 +##鬆 +##鬍 +##鬓 +##鬚 +##鬟 +##鬢 +##鬣 +##鬥 +##鬧 +##鬱 +##鬼 +##魁 +##魂 +##魄 +##魅 +##魇 +##魍 +##魏 +##魔 +##魘 +##魚 +##魯 +##魷 +##鮑 +##鮨 +##鮪 +##鮭 +##鮮 +##鯉 +##鯊 +##鯖 +##鯛 +##鯨 +##鯰 +##鯽 +##鰍 +##鰓 +##鰭 +##鰲 +##鰻 +##鰾 +##鱈 +##鱉 +##鱔 +##鱗 +##鱷 +##鱸 +##鱼 +##鱿 +##鲁 +##鲈 +##鲍 +##鲑 +##鲛 +##鲜 +##鲟 +##鲢 +##鲤 +##鲨 +##鲫 +##鲱 +##鲲 +##鲶 +##鲷 +##鲸 +##鳃 +##鳄 +##鳅 +##鳌 +##鳍 +##鳕 +##鳖 +##鳗 +##鳝 +##鳞 +##鳥 +##鳩 +##鳳 +##鳴 +##鳶 +##鴉 +##鴕 +##鴛 +##鴦 +##鴨 +##鴻 +##鴿 +##鵑 +##鵜 +##鵝 +##鵡 +##鵬 +##鵰 +##鵲 +##鶘 +##鶩 +##鶯 +##鶴 +##鷗 +##鷲 +##鷹 +##鷺 +##鸚 +##鸞 +##鸟 +##鸠 +##鸡 +##鸢 +##鸣 +##鸥 +##鸦 +##鸨 +##鸪 +##鸭 +##鸯 +##鸳 +##鸵 +##鸽 +##鸾 +##鸿 +##鹂 +##鹃 +##鹄 +##鹅 +##鹈 +##鹉 +##鹊 +##鹌 +##鹏 +##鹑 +##鹕 +##鹘 +##鹜 +##鹞 +##鹤 +##鹦 +##鹧 +##鹫 +##鹭 +##鹰 +##鹳 +##鹵 +##鹹 +##鹼 +##鹽 +##鹿 +##麂 +##麋 +##麒 +##麓 +##麗 +##麝 +##麟 +##麥 +##麦 +##麩 +##麴 +##麵 +##麸 +##麺 +##麻 +##麼 +##麽 +##麾 +##黃 +##黄 +##黍 +##黎 +##黏 +##黑 +##黒 +##黔 +##默 +##黛 +##黜 +##黝 +##點 +##黠 +##黨 +##黯 +##黴 +##鼋 +##鼎 +##鼐 +##鼓 +##鼠 +##鼬 +##鼹 +##鼻 +##鼾 +##齁 +##齊 +##齋 +##齐 +##齒 +##齡 +##齢 +##齣 +##齦 +##齿 +##龄 +##龅 +##龈 +##龊 +##龋 +##龌 +##龍 +##龐 +##龔 +##龕 +##龙 +##龚 +##龛 +##龜 +##龟 +##︰ +##︱ +##︶ +##︿ +##﹁ +##﹂ +##﹍ +##﹏ +##﹐ +##﹑ +##﹒ +##﹔ +##﹕ +##﹖ +##﹗ +##﹙ +##﹚ +##﹝ +##﹞ +##﹡ +##﹣ +##! +##" +### +##$ +##% +##& +##' +##( +##) +##* +##, +##- +##. +##/ +##: +##; +##< +##? +##@ +##[ +##\ +##] +##^ +##_ +##` +##f +##h +##j +##u +##w +##z +##{ +##} +##。 +##「 +##」 +##、 +##・ +##ッ +##ー +##イ +##ク +##シ +##ス +##ト +##ノ +##フ +##ラ +##ル +##ン +##゙ +##゚ +## ̄ +##¥ +##👍 +##🔥 +##😂 +##😎 diff --git a/create_pretraining_data.py b/create_pretraining_data.py new file mode 100644 index 0000000..5340d96 --- /dev/null +++ b/create_pretraining_data.py @@ -0,0 +1,469 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Create masked LM/next sentence masked_lm TF examples for BERT.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import random +import tokenization +import tensorflow as tf + +flags = tf.flags + +FLAGS = flags.FLAGS + +flags.DEFINE_string("input_file", None, + "Input raw text file (or comma-separated list of files).") + +flags.DEFINE_string( + "output_file", None, + "Output TF example file (or comma-separated list of files).") + +flags.DEFINE_string("vocab_file", None, + "The vocabulary file that the BERT model was trained on.") + +flags.DEFINE_bool( + "do_lower_case", True, + "Whether to lower case the input text. Should be True for uncased " + "models and False for cased models.") + +flags.DEFINE_bool( + "do_whole_word_mask", False, + "Whether to use whole word masking rather than per-WordPiece masking.") + +flags.DEFINE_integer("max_seq_length", 128, "Maximum sequence length.") + +flags.DEFINE_integer("max_predictions_per_seq", 20, + "Maximum number of masked LM predictions per sequence.") + +flags.DEFINE_integer("random_seed", 12345, "Random seed for data generation.") + +flags.DEFINE_integer( + "dupe_factor", 10, + "Number of times to duplicate the input data (with different masks).") + +flags.DEFINE_float("masked_lm_prob", 0.15, "Masked LM probability.") + +flags.DEFINE_float( + "short_seq_prob", 0.1, + "Probability of creating sequences which are shorter than the " + "maximum length.") + + +class TrainingInstance(object): + """A single training instance (sentence pair).""" + + def __init__(self, tokens, segment_ids, masked_lm_positions, masked_lm_labels, + is_random_next): + self.tokens = tokens + self.segment_ids = segment_ids + self.is_random_next = is_random_next + self.masked_lm_positions = masked_lm_positions + self.masked_lm_labels = masked_lm_labels + + def __str__(self): + s = "" + s += "tokens: %s\n" % (" ".join( + [tokenization.printable_text(x) for x in self.tokens])) + s += "segment_ids: %s\n" % (" ".join([str(x) for x in self.segment_ids])) + s += "is_random_next: %s\n" % self.is_random_next + s += "masked_lm_positions: %s\n" % (" ".join( + [str(x) for x in self.masked_lm_positions])) + s += "masked_lm_labels: %s\n" % (" ".join( + [tokenization.printable_text(x) for x in self.masked_lm_labels])) + s += "\n" + return s + + def __repr__(self): + return self.__str__() + + +def write_instance_to_example_files(instances, tokenizer, max_seq_length, + max_predictions_per_seq, output_files): + """Create TF example files from `TrainingInstance`s.""" + writers = [] + for output_file in output_files: + writers.append(tf.python_io.TFRecordWriter(output_file)) + + writer_index = 0 + + total_written = 0 + for (inst_index, instance) in enumerate(instances): + input_ids = tokenizer.convert_tokens_to_ids(instance.tokens) + input_mask = [1] * len(input_ids) + segment_ids = list(instance.segment_ids) + assert len(input_ids) <= max_seq_length + + while len(input_ids) < max_seq_length: + input_ids.append(0) + input_mask.append(0) + segment_ids.append(0) + + assert len(input_ids) == max_seq_length + assert len(input_mask) == max_seq_length + assert len(segment_ids) == max_seq_length + + masked_lm_positions = list(instance.masked_lm_positions) + masked_lm_ids = tokenizer.convert_tokens_to_ids(instance.masked_lm_labels) + masked_lm_weights = [1.0] * len(masked_lm_ids) + + while len(masked_lm_positions) < max_predictions_per_seq: + masked_lm_positions.append(0) + masked_lm_ids.append(0) + masked_lm_weights.append(0.0) + + next_sentence_label = 1 if instance.is_random_next else 0 + + features = collections.OrderedDict() + features["input_ids"] = create_int_feature(input_ids) + features["input_mask"] = create_int_feature(input_mask) + features["segment_ids"] = create_int_feature(segment_ids) + features["masked_lm_positions"] = create_int_feature(masked_lm_positions) + features["masked_lm_ids"] = create_int_feature(masked_lm_ids) + features["masked_lm_weights"] = create_float_feature(masked_lm_weights) + features["next_sentence_labels"] = create_int_feature([next_sentence_label]) + + tf_example = tf.train.Example(features=tf.train.Features(feature=features)) + + writers[writer_index].write(tf_example.SerializeToString()) + writer_index = (writer_index + 1) % len(writers) + + total_written += 1 + + if inst_index < 20: + tf.logging.info("*** Example ***") + tf.logging.info("tokens: %s" % " ".join( + [tokenization.printable_text(x) for x in instance.tokens])) + + for feature_name in features.keys(): + feature = features[feature_name] + values = [] + if feature.int64_list.value: + values = feature.int64_list.value + elif feature.float_list.value: + values = feature.float_list.value + tf.logging.info( + "%s: %s" % (feature_name, " ".join([str(x) for x in values]))) + + for writer in writers: + writer.close() + + tf.logging.info("Wrote %d total instances", total_written) + + +def create_int_feature(values): + feature = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values))) + return feature + + +def create_float_feature(values): + feature = tf.train.Feature(float_list=tf.train.FloatList(value=list(values))) + return feature + + +def create_training_instances(input_files, tokenizer, max_seq_length, + dupe_factor, short_seq_prob, masked_lm_prob, + max_predictions_per_seq, rng): + """Create `TrainingInstance`s from raw text.""" + all_documents = [[]] + + # Input file format: + # (1) One sentence per line. These should ideally be actual sentences, not + # entire paragraphs or arbitrary spans of text. (Because we use the + # sentence boundaries for the "next sentence prediction" task). + # (2) Blank lines between documents. Document boundaries are needed so + # that the "next sentence prediction" task doesn't span between documents. + for input_file in input_files: + with tf.gfile.GFile(input_file, "r") as reader: + while True: + line = tokenization.convert_to_unicode(reader.readline()) + if not line: + break + line = line.strip() + + # Empty lines are used as document delimiters + if not line: + all_documents.append([]) + tokens = tokenizer.tokenize(line) + if tokens: + all_documents[-1].append(tokens) + + # Remove empty documents + all_documents = [x for x in all_documents if x] + rng.shuffle(all_documents) + + vocab_words = list(tokenizer.vocab.keys()) + instances = [] + for _ in range(dupe_factor): + for document_index in range(len(all_documents)): + instances.extend( + create_instances_from_document( + all_documents, document_index, max_seq_length, short_seq_prob, + masked_lm_prob, max_predictions_per_seq, vocab_words, rng)) + + rng.shuffle(instances) + return instances + + +def create_instances_from_document( + all_documents, document_index, max_seq_length, short_seq_prob, + masked_lm_prob, max_predictions_per_seq, vocab_words, rng): + """Creates `TrainingInstance`s for a single document.""" + document = all_documents[document_index] + + # Account for [CLS], [SEP], [SEP] + max_num_tokens = max_seq_length - 3 + + # We *usually* want to fill up the entire sequence since we are padding + # to `max_seq_length` anyways, so short sequences are generally wasted + # computation. However, we *sometimes* + # (i.e., short_seq_prob == 0.1 == 10% of the time) want to use shorter + # sequences to minimize the mismatch between pre-training and fine-tuning. + # The `target_seq_length` is just a rough target however, whereas + # `max_seq_length` is a hard limit. + target_seq_length = max_num_tokens + if rng.random() < short_seq_prob: + target_seq_length = rng.randint(2, max_num_tokens) + + # We DON'T just concatenate all of the tokens from a document into a long + # sequence and choose an arbitrary split point because this would make the + # next sentence prediction task too easy. Instead, we split the input into + # segments "A" and "B" based on the actual "sentences" provided by the user + # input. + instances = [] + current_chunk = [] + current_length = 0 + i = 0 + while i < len(document): + segment = document[i] + current_chunk.append(segment) + current_length += len(segment) + if i == len(document) - 1 or current_length >= target_seq_length: + if current_chunk: + # `a_end` is how many segments from `current_chunk` go into the `A` + # (first) sentence. + a_end = 1 + if len(current_chunk) >= 2: + a_end = rng.randint(1, len(current_chunk) - 1) + + tokens_a = [] + for j in range(a_end): + tokens_a.extend(current_chunk[j]) + + tokens_b = [] + # Random next + is_random_next = False + if len(current_chunk) == 1 or rng.random() < 0.5: + is_random_next = True + target_b_length = target_seq_length - len(tokens_a) + + # This should rarely go for more than one iteration for large + # corpora. However, just to be careful, we try to make sure that + # the random document is not the same as the document + # we're processing. + for _ in range(10): + random_document_index = rng.randint(0, len(all_documents) - 1) + if random_document_index != document_index: + break + + random_document = all_documents[random_document_index] + random_start = rng.randint(0, len(random_document) - 1) + for j in range(random_start, len(random_document)): + tokens_b.extend(random_document[j]) + if len(tokens_b) >= target_b_length: + break + # We didn't actually use these segments so we "put them back" so + # they don't go to waste. + num_unused_segments = len(current_chunk) - a_end + i -= num_unused_segments + # Actual next + else: + is_random_next = False + for j in range(a_end, len(current_chunk)): + tokens_b.extend(current_chunk[j]) + truncate_seq_pair(tokens_a, tokens_b, max_num_tokens, rng) + + assert len(tokens_a) >= 1 + assert len(tokens_b) >= 1 + + tokens = [] + segment_ids = [] + tokens.append("[CLS]") + segment_ids.append(0) + for token in tokens_a: + tokens.append(token) + segment_ids.append(0) + + tokens.append("[SEP]") + segment_ids.append(0) + + for token in tokens_b: + tokens.append(token) + segment_ids.append(1) + tokens.append("[SEP]") + segment_ids.append(1) + + (tokens, masked_lm_positions, + masked_lm_labels) = create_masked_lm_predictions( + tokens, masked_lm_prob, max_predictions_per_seq, vocab_words, rng) + instance = TrainingInstance( + tokens=tokens, + segment_ids=segment_ids, + is_random_next=is_random_next, + masked_lm_positions=masked_lm_positions, + masked_lm_labels=masked_lm_labels) + instances.append(instance) + current_chunk = [] + current_length = 0 + i += 1 + + return instances + + +MaskedLmInstance = collections.namedtuple("MaskedLmInstance", + ["index", "label"]) + + +def create_masked_lm_predictions(tokens, masked_lm_prob, + max_predictions_per_seq, vocab_words, rng): + """Creates the predictions for the masked LM objective.""" + + cand_indexes = [] + for (i, token) in enumerate(tokens): + if token == "[CLS]" or token == "[SEP]": + continue + # Whole Word Masking means that if we mask all of the wordpieces + # corresponding to an original word. When a word has been split into + # WordPieces, the first token does not have any marker and any subsequence + # tokens are prefixed with ##. So whenever we see the ## token, we + # append it to the previous set of word indexes. + # + # Note that Whole Word Masking does *not* change the training code + # at all -- we still predict each WordPiece independently, softmaxed + # over the entire vocabulary. + if (FLAGS.do_whole_word_mask and len(cand_indexes) >= 1 and + token.startswith("##")): + cand_indexes[-1].append(i) + else: + cand_indexes.append([i]) + + rng.shuffle(cand_indexes) + + output_tokens = list(tokens) + + num_to_predict = min(max_predictions_per_seq, + max(1, int(round(len(tokens) * masked_lm_prob)))) + + masked_lms = [] + covered_indexes = set() + for index_set in cand_indexes: + if len(masked_lms) >= num_to_predict: + break + # If adding a whole-word mask would exceed the maximum number of + # predictions, then just skip this candidate. + if len(masked_lms) + len(index_set) > num_to_predict: + continue + is_any_index_covered = False + for index in index_set: + if index in covered_indexes: + is_any_index_covered = True + break + if is_any_index_covered: + continue + for index in index_set: + covered_indexes.add(index) + + masked_token = None + # 80% of the time, replace with [MASK] + if rng.random() < 0.8: + masked_token = "[MASK]" + else: + # 10% of the time, keep original + if rng.random() < 0.5: + masked_token = tokens[index] + # 10% of the time, replace with random word + else: + masked_token = vocab_words[rng.randint(0, len(vocab_words) - 1)] + + output_tokens[index] = masked_token + + masked_lms.append(MaskedLmInstance(index=index, label=tokens[index])) + assert len(masked_lms) <= num_to_predict + masked_lms = sorted(masked_lms, key=lambda x: x.index) + + masked_lm_positions = [] + masked_lm_labels = [] + for p in masked_lms: + masked_lm_positions.append(p.index) + masked_lm_labels.append(p.label) + + return (output_tokens, masked_lm_positions, masked_lm_labels) + + +def truncate_seq_pair(tokens_a, tokens_b, max_num_tokens, rng): + """Truncates a pair of sequences to a maximum sequence length.""" + while True: + total_length = len(tokens_a) + len(tokens_b) + if total_length <= max_num_tokens: + break + + trunc_tokens = tokens_a if len(tokens_a) > len(tokens_b) else tokens_b + assert len(trunc_tokens) >= 1 + + # We want to sometimes truncate from the front and sometimes from the + # back to add more randomness and avoid biases. + if rng.random() < 0.5: + del trunc_tokens[0] + else: + trunc_tokens.pop() + + +def main(_): + tf.logging.set_verbosity(tf.logging.INFO) + + tokenizer = tokenization.FullTokenizer( + vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) + + input_files = [] + for input_pattern in FLAGS.input_file.split(","): + input_files.extend(tf.gfile.Glob(input_pattern)) + + tf.logging.info("*** Reading from input files ***") + for input_file in input_files: + tf.logging.info(" %s", input_file) + + rng = random.Random(FLAGS.random_seed) + instances = create_training_instances( + input_files, tokenizer, FLAGS.max_seq_length, FLAGS.dupe_factor, + FLAGS.short_seq_prob, FLAGS.masked_lm_prob, FLAGS.max_predictions_per_seq, + rng) + + output_files = FLAGS.output_file.split(",") + tf.logging.info("*** Writing to output files ***") + for output_file in output_files: + tf.logging.info(" %s", output_file) + + write_instance_to_example_files(instances, tokenizer, FLAGS.max_seq_length, + FLAGS.max_predictions_per_seq, output_files) + + +if __name__ == "__main__": + flags.mark_flag_as_required("input_file") + flags.mark_flag_as_required("output_file") + flags.mark_flag_as_required("vocab_file") + tf.app.run() diff --git a/dealing_dataset.py b/dealing_dataset.py new file mode 100644 index 0000000..d8db627 --- /dev/null +++ b/dealing_dataset.py @@ -0,0 +1,49 @@ +import sqlite3 + +conn = sqlite3.connect(r"nlpdata.db")\ + + +def create_dataset_ep(table): + cursor = conn.cursor() + sql = "select * from " + table + " LIMIT 20" + cursor.execute(sql) + conn.commit() + + dataset = [] + + for row in cursor: + eid = row[0] + tag = row[1] + content = row[2] + if tag == "5" or tag == "4": + dataset.append([eid, 2, content]) + print(eid, 2, content) + elif tag == "1" or tag == "2": + dataset.append([eid, 0, content]) + print(eid, 0, content) + else: + dataset.append([eid, 1, content]) + print(eid, 1, content) + return dataset + + +def create_dataset_pdt(): + conn_pdt = sqlite3.connect(r".\bptdata.db") + cursor = conn_pdt.cursor() + sql = "select * from " + "predict_data" + cursor.execute(sql) + conn_pdt.commit() + + dataset = [] + + for row in cursor: + stnid = row[0] + text = row[1] + dataset.append([stnid, 0, text]) + print(stnid, 0, text) + + return dataset + + +if __name__ == '__main__': + print(create_dataset_ep("amki_test")) \ No newline at end of file diff --git a/extract_features.py b/extract_features.py new file mode 100644 index 0000000..60e3830 --- /dev/null +++ b/extract_features.py @@ -0,0 +1,419 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Extract pre-computed feature vectors from BERT.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import codecs +import collections +import json +import re + +import modeling +import tokenization +import tensorflow as tf + +flags = tf.flags + +FLAGS = flags.FLAGS + +flags.DEFINE_string("input_file", None, "") + +flags.DEFINE_string("output_file", None, "") + +flags.DEFINE_string("layers", "-1,-2,-3,-4", "") + +flags.DEFINE_string( + "bert_config_file", None, + "The config json file corresponding to the pre-trained BERT model. " + "This specifies the model architecture.") + +flags.DEFINE_integer( + "max_seq_length", 128, + "The maximum total input sequence length after WordPiece tokenization. " + "Sequences longer than this will be truncated, and sequences shorter " + "than this will be padded.") + +flags.DEFINE_string( + "init_checkpoint", None, + "Initial checkpoint (usually from a pre-trained BERT model).") + +flags.DEFINE_string("vocab_file", None, + "The vocabulary file that the BERT model was trained on.") + +flags.DEFINE_bool( + "do_lower_case", True, + "Whether to lower case the input text. Should be True for uncased " + "models and False for cased models.") + +flags.DEFINE_integer("batch_size", 32, "Batch size for predictions.") + +flags.DEFINE_bool("use_tpu", False, "Whether to use TPU or GPU/CPU.") + +flags.DEFINE_string("master", None, + "If using a TPU, the address of the master.") + +flags.DEFINE_integer( + "num_tpu_cores", 8, + "Only used if `use_tpu` is True. Total number of TPU cores to use.") + +flags.DEFINE_bool( + "use_one_hot_embeddings", False, + "If True, tf.one_hot will be used for embedding lookups, otherwise " + "tf.nn.embedding_lookup will be used. On TPUs, this should be True " + "since it is much faster.") + + +class InputExample(object): + + def __init__(self, unique_id, text_a, text_b): + self.unique_id = unique_id + self.text_a = text_a + self.text_b = text_b + + +class InputFeatures(object): + """A single set of features of data.""" + + def __init__(self, unique_id, tokens, input_ids, input_mask, input_type_ids): + self.unique_id = unique_id + self.tokens = tokens + self.input_ids = input_ids + self.input_mask = input_mask + self.input_type_ids = input_type_ids + + +def input_fn_builder(features, seq_length): + """Creates an `input_fn` closure to be passed to TPUEstimator.""" + + all_unique_ids = [] + all_input_ids = [] + all_input_mask = [] + all_input_type_ids = [] + + for feature in features: + all_unique_ids.append(feature.unique_id) + all_input_ids.append(feature.input_ids) + all_input_mask.append(feature.input_mask) + all_input_type_ids.append(feature.input_type_ids) + + def input_fn(params): + """The actual input function.""" + batch_size = params["batch_size"] + + num_examples = len(features) + + # This is for demo purposes and does NOT scale to large data sets. We do + # not use Dataset.from_generator() because that uses tf.py_func which is + # not TPU compatible. The right way to load data is with TFRecordReader. + d = tf.data.Dataset.from_tensor_slices({ + "unique_ids": + tf.constant(all_unique_ids, shape=[num_examples], dtype=tf.int32), + "input_ids": + tf.constant( + all_input_ids, shape=[num_examples, seq_length], + dtype=tf.int32), + "input_mask": + tf.constant( + all_input_mask, + shape=[num_examples, seq_length], + dtype=tf.int32), + "input_type_ids": + tf.constant( + all_input_type_ids, + shape=[num_examples, seq_length], + dtype=tf.int32), + }) + + d = d.batch(batch_size=batch_size, drop_remainder=False) + return d + + return input_fn + + +def model_fn_builder(bert_config, init_checkpoint, layer_indexes, use_tpu, + use_one_hot_embeddings): + """Returns `model_fn` closure for TPUEstimator.""" + + def model_fn(features, labels, mode, params): # pylint: disable=unused-argument + """The `model_fn` for TPUEstimator.""" + + unique_ids = features["unique_ids"] + input_ids = features["input_ids"] + input_mask = features["input_mask"] + input_type_ids = features["input_type_ids"] + + model = modeling.BertModel( + config=bert_config, + is_training=False, + input_ids=input_ids, + input_mask=input_mask, + token_type_ids=input_type_ids, + use_one_hot_embeddings=use_one_hot_embeddings) + + if mode != tf.estimator.ModeKeys.PREDICT: + raise ValueError("Only PREDICT modes are supported: %s" % (mode)) + + tvars = tf.trainable_variables() + scaffold_fn = None + (assignment_map, + initialized_variable_names) = modeling.get_assignment_map_from_checkpoint( + tvars, init_checkpoint) + if use_tpu: + + def tpu_scaffold(): + tf.train.init_from_checkpoint(init_checkpoint, assignment_map) + return tf.train.Scaffold() + + scaffold_fn = tpu_scaffold + else: + tf.train.init_from_checkpoint(init_checkpoint, assignment_map) + + tf.logging.info("**** Trainable Variables ****") + for var in tvars: + init_string = "" + if var.name in initialized_variable_names: + init_string = ", *INIT_FROM_CKPT*" + tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, + init_string) + + all_layers = model.get_all_encoder_layers() + + predictions = { + "unique_id": unique_ids, + } + + for (i, layer_index) in enumerate(layer_indexes): + predictions["layer_output_%d" % i] = all_layers[layer_index] + + output_spec = tf.contrib.tpu.TPUEstimatorSpec( + mode=mode, predictions=predictions, scaffold_fn=scaffold_fn) + return output_spec + + return model_fn + + +def convert_examples_to_features(examples, seq_length, tokenizer): + """Loads a data file into a list of `InputBatch`s.""" + + features = [] + for (ex_index, example) in enumerate(examples): + tokens_a = tokenizer.tokenize(example.text_a) + + tokens_b = None + if example.text_b: + tokens_b = tokenizer.tokenize(example.text_b) + + if tokens_b: + # Modifies `tokens_a` and `tokens_b` in place so that the total + # length is less than the specified length. + # Account for [CLS], [SEP], [SEP] with "- 3" + _truncate_seq_pair(tokens_a, tokens_b, seq_length - 3) + else: + # Account for [CLS] and [SEP] with "- 2" + if len(tokens_a) > seq_length - 2: + tokens_a = tokens_a[0:(seq_length - 2)] + + # The convention in BERT is: + # (a) For sequence pairs: + # tokens: [CLS] is this jack ##son ##ville ? [SEP] no it is not . [SEP] + # type_ids: 0 0 0 0 0 0 0 0 1 1 1 1 1 1 + # (b) For single sequences: + # tokens: [CLS] the dog is hairy . [SEP] + # type_ids: 0 0 0 0 0 0 0 + # + # Where "type_ids" are used to indicate whether this is the first + # sequence or the second sequence. The embedding vectors for `type=0` and + # `type=1` were learned during pre-training and are added to the wordpiece + # embedding vector (and position vector). This is not *strictly* necessary + # since the [SEP] token unambiguously separates the sequences, but it makes + # it easier for the model to learn the concept of sequences. + # + # For classification tasks, the first vector (corresponding to [CLS]) is + # used as as the "sentence vector". Note that this only makes sense because + # the entire model is fine-tuned. + tokens = [] + input_type_ids = [] + tokens.append("[CLS]") + input_type_ids.append(0) + for token in tokens_a: + tokens.append(token) + input_type_ids.append(0) + tokens.append("[SEP]") + input_type_ids.append(0) + + if tokens_b: + for token in tokens_b: + tokens.append(token) + input_type_ids.append(1) + tokens.append("[SEP]") + input_type_ids.append(1) + + input_ids = tokenizer.convert_tokens_to_ids(tokens) + + # The mask has 1 for real tokens and 0 for padding tokens. Only real + # tokens are attended to. + input_mask = [1] * len(input_ids) + + # Zero-pad up to the sequence length. + while len(input_ids) < seq_length: + input_ids.append(0) + input_mask.append(0) + input_type_ids.append(0) + + assert len(input_ids) == seq_length + assert len(input_mask) == seq_length + assert len(input_type_ids) == seq_length + + if ex_index < 5: + tf.logging.info("*** Example ***") + tf.logging.info("unique_id: %s" % (example.unique_id)) + tf.logging.info("tokens: %s" % " ".join( + [tokenization.printable_text(x) for x in tokens])) + tf.logging.info("input_ids: %s" % " ".join([str(x) for x in input_ids])) + tf.logging.info("input_mask: %s" % " ".join([str(x) for x in input_mask])) + tf.logging.info( + "input_type_ids: %s" % " ".join([str(x) for x in input_type_ids])) + + features.append( + InputFeatures( + unique_id=example.unique_id, + tokens=tokens, + input_ids=input_ids, + input_mask=input_mask, + input_type_ids=input_type_ids)) + return features + + +def _truncate_seq_pair(tokens_a, tokens_b, max_length): + """Truncates a sequence pair in place to the maximum length.""" + + # This is a simple heuristic which will always truncate the longer sequence + # one token at a time. This makes more sense than truncating an equal percent + # of tokens from each, since if one sequence is very short then each token + # that's truncated likely contains more information than a longer sequence. + while True: + total_length = len(tokens_a) + len(tokens_b) + if total_length <= max_length: + break + if len(tokens_a) > len(tokens_b): + tokens_a.pop() + else: + tokens_b.pop() + + +def read_examples(input_file): + """Read a list of `InputExample`s from an input file.""" + examples = [] + unique_id = 0 + with tf.gfile.GFile(input_file, "r") as reader: + while True: + line = tokenization.convert_to_unicode(reader.readline()) + if not line: + break + line = line.strip() + text_a = None + text_b = None + m = re.match(r"^(.*) \|\|\| (.*)$", line) + if m is None: + text_a = line + else: + text_a = m.group(1) + text_b = m.group(2) + examples.append( + InputExample(unique_id=unique_id, text_a=text_a, text_b=text_b)) + unique_id += 1 + return examples + + +def main(_): + tf.logging.set_verbosity(tf.logging.INFO) + + layer_indexes = [int(x) for x in FLAGS.layers.split(",")] + + bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) + + tokenizer = tokenization.FullTokenizer( + vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) + + is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2 + run_config = tf.contrib.tpu.RunConfig( + master=FLAGS.master, + tpu_config=tf.contrib.tpu.TPUConfig( + num_shards=FLAGS.num_tpu_cores, + per_host_input_for_training=is_per_host)) + + examples = read_examples(FLAGS.input_file) + + features = convert_examples_to_features( + examples=examples, seq_length=FLAGS.max_seq_length, tokenizer=tokenizer) + + unique_id_to_feature = {} + for feature in features: + unique_id_to_feature[feature.unique_id] = feature + + model_fn = model_fn_builder( + bert_config=bert_config, + init_checkpoint=FLAGS.init_checkpoint, + layer_indexes=layer_indexes, + use_tpu=FLAGS.use_tpu, + use_one_hot_embeddings=FLAGS.use_one_hot_embeddings) + + # If TPU is not available, this will fall back to normal Estimator on CPU + # or GPU. + estimator = tf.contrib.tpu.TPUEstimator( + use_tpu=FLAGS.use_tpu, + model_fn=model_fn, + config=run_config, + predict_batch_size=FLAGS.batch_size) + + input_fn = input_fn_builder( + features=features, seq_length=FLAGS.max_seq_length) + + with codecs.getwriter("utf-8")(tf.gfile.Open(FLAGS.output_file, + "w")) as writer: + for result in estimator.predict(input_fn, yield_single_examples=True): + unique_id = int(result["unique_id"]) + feature = unique_id_to_feature[unique_id] + output_json = collections.OrderedDict() + output_json["linex_index"] = unique_id + all_features = [] + for (i, token) in enumerate(feature.tokens): + all_layers = [] + for (j, layer_index) in enumerate(layer_indexes): + layer_output = result["layer_output_%d" % j] + layers = collections.OrderedDict() + layers["index"] = layer_index + layers["values"] = [ + round(float(x), 6) for x in layer_output[i:(i + 1)].flat + ] + all_layers.append(layers) + features = collections.OrderedDict() + features["token"] = token + features["layers"] = all_layers + all_features.append(features) + output_json["features"] = all_features + writer.write(json.dumps(output_json) + "\n") + + +if __name__ == "__main__": + flags.mark_flag_as_required("input_file") + flags.mark_flag_as_required("vocab_file") + flags.mark_flag_as_required("bert_config_file") + flags.mark_flag_as_required("init_checkpoint") + flags.mark_flag_as_required("output_file") + tf.app.run() diff --git a/modeling.py b/modeling.py new file mode 100644 index 0000000..fed5259 --- /dev/null +++ b/modeling.py @@ -0,0 +1,986 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""The main BERT model and related functions.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import copy +import json +import math +import re +import numpy as np +import six +import tensorflow as tf + + +class BertConfig(object): + """Configuration for `BertModel`.""" + + def __init__(self, + vocab_size, + hidden_size=768, + num_hidden_layers=12, + num_attention_heads=12, + intermediate_size=3072, + hidden_act="gelu", + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + max_position_embeddings=512, + type_vocab_size=16, + initializer_range=0.02): + """Constructs BertConfig. + + Args: + vocab_size: Vocabulary size of `inputs_ids` in `BertModel`. + hidden_size: Size of the encoder layers and the pooler layer. + num_hidden_layers: Number of hidden layers in the Transformer encoder. + num_attention_heads: Number of attention heads for each attention layer in + the Transformer encoder. + intermediate_size: The size of the "intermediate" (i.e., feed-forward) + layer in the Transformer encoder. + hidden_act: The non-linear activation function (function or string) in the + encoder and pooler. + hidden_dropout_prob: The dropout probability for all fully connected + layers in the embeddings, encoder, and pooler. + attention_probs_dropout_prob: The dropout ratio for the attention + probabilities. + max_position_embeddings: The maximum sequence length that this model might + ever be used with. Typically set this to something large just in case + (e.g., 512 or 1024 or 2048). + type_vocab_size: The vocabulary size of the `token_type_ids` passed into + `BertModel`. + initializer_range: The stdev of the truncated_normal_initializer for + initializing all weight matrices. + """ + self.vocab_size = vocab_size + self.hidden_size = hidden_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.hidden_act = hidden_act + self.intermediate_size = intermediate_size + self.hidden_dropout_prob = hidden_dropout_prob + self.attention_probs_dropout_prob = attention_probs_dropout_prob + self.max_position_embeddings = max_position_embeddings + self.type_vocab_size = type_vocab_size + self.initializer_range = initializer_range + + @classmethod + def from_dict(cls, json_object): + """Constructs a `BertConfig` from a Python dictionary of parameters.""" + config = BertConfig(vocab_size=None) + for (key, value) in six.iteritems(json_object): + config.__dict__[key] = value + return config + + @classmethod + def from_json_file(cls, json_file): + """Constructs a `BertConfig` from a json file of parameters.""" + with tf.gfile.GFile(json_file, "r") as reader: + text = reader.read() + return cls.from_dict(json.loads(text)) + + def to_dict(self): + """Serializes this instance to a Python dictionary.""" + output = copy.deepcopy(self.__dict__) + return output + + def to_json_string(self): + """Serializes this instance to a JSON string.""" + return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n" + + +class BertModel(object): + """BERT model ("Bidirectional Encoder Representations from Transformers"). + + Example usage: + + ```python + # Already been converted into WordPiece token ids + input_ids = tf.constant([[31, 51, 99], [15, 5, 0]]) + input_mask = tf.constant([[1, 1, 1], [1, 1, 0]]) + token_type_ids = tf.constant([[0, 0, 1], [0, 2, 0]]) + + config = modeling.BertConfig(vocab_size=32000, hidden_size=512, + num_hidden_layers=8, num_attention_heads=6, intermediate_size=1024) + + model = modeling.BertModel(config=config, is_training=True, + input_ids=input_ids, input_mask=input_mask, token_type_ids=token_type_ids) + + label_embeddings = tf.get_variable(...) + pooled_output = model.get_pooled_output() + logits = tf.matmul(pooled_output, label_embeddings) + ... + ``` + """ + + def __init__(self, + config, + is_training, + input_ids, + input_mask=None, + token_type_ids=None, + use_one_hot_embeddings=False, + scope=None): + """Constructor for BertModel. + + Args: + config: `BertConfig` instance. + is_training: bool. true for training model, false for eval model. Controls + whether dropout will be applied. + input_ids: int32 Tensor of shape [batch_size, seq_length]. + input_mask: (optional) int32 Tensor of shape [batch_size, seq_length]. + token_type_ids: (optional) int32 Tensor of shape [batch_size, seq_length]. + use_one_hot_embeddings: (optional) bool. Whether to use one-hot word + embeddings or tf.embedding_lookup() for the word embeddings. + scope: (optional) variable scope. Defaults to "bert". + + Raises: + ValueError: The config is invalid or one of the input tensor shapes + is invalid. + """ + config = copy.deepcopy(config) + if not is_training: + config.hidden_dropout_prob = 0.0 + config.attention_probs_dropout_prob = 0.0 + + input_shape = get_shape_list(input_ids, expected_rank=2) + batch_size = input_shape[0] + seq_length = input_shape[1] + + if input_mask is None: + input_mask = tf.ones(shape=[batch_size, seq_length], dtype=tf.int32) + + if token_type_ids is None: + token_type_ids = tf.zeros(shape=[batch_size, seq_length], dtype=tf.int32) + + with tf.variable_scope(scope, default_name="bert"): + with tf.variable_scope("embeddings"): + # Perform embedding lookup on the word ids. + (self.embedding_output, self.embedding_table) = embedding_lookup( + input_ids=input_ids, + vocab_size=config.vocab_size, + embedding_size=config.hidden_size, + initializer_range=config.initializer_range, + word_embedding_name="word_embeddings", + use_one_hot_embeddings=use_one_hot_embeddings) + + # Add positional embeddings and token type embeddings, then layer + # normalize and perform dropout. + self.embedding_output = embedding_postprocessor( + input_tensor=self.embedding_output, + use_token_type=True, + token_type_ids=token_type_ids, + token_type_vocab_size=config.type_vocab_size, + token_type_embedding_name="token_type_embeddings", + use_position_embeddings=True, + position_embedding_name="position_embeddings", + initializer_range=config.initializer_range, + max_position_embeddings=config.max_position_embeddings, + dropout_prob=config.hidden_dropout_prob) + + with tf.variable_scope("encoder"): + # This converts a 2D mask of shape [batch_size, seq_length] to a 3D + # mask of shape [batch_size, seq_length, seq_length] which is used + # for the attention scores. + attention_mask = create_attention_mask_from_input_mask( + input_ids, input_mask) + + # Run the stacked transformer. + # `sequence_output` shape = [batch_size, seq_length, hidden_size]. + self.all_encoder_layers = transformer_model( + input_tensor=self.embedding_output, + attention_mask=attention_mask, + hidden_size=config.hidden_size, + num_hidden_layers=config.num_hidden_layers, + num_attention_heads=config.num_attention_heads, + intermediate_size=config.intermediate_size, + intermediate_act_fn=get_activation(config.hidden_act), + hidden_dropout_prob=config.hidden_dropout_prob, + attention_probs_dropout_prob=config.attention_probs_dropout_prob, + initializer_range=config.initializer_range, + do_return_all_layers=True) + + self.sequence_output = self.all_encoder_layers[-1] + # The "pooler" converts the encoded sequence tensor of shape + # [batch_size, seq_length, hidden_size] to a tensor of shape + # [batch_size, hidden_size]. This is necessary for segment-level + # (or segment-pair-level) classification tasks where we need a fixed + # dimensional representation of the segment. + with tf.variable_scope("pooler"): + # We "pool" the model by simply taking the hidden state corresponding + # to the first token. We assume that this has been pre-trained + first_token_tensor = tf.squeeze(self.sequence_output[:, 0:1, :], axis=1) + self.pooled_output = tf.layers.dense( + first_token_tensor, + config.hidden_size, + activation=tf.tanh, + kernel_initializer=create_initializer(config.initializer_range)) + + def get_pooled_output(self): + return self.pooled_output + + def get_sequence_output(self): + """Gets final hidden layer of encoder. + + Returns: + float Tensor of shape [batch_size, seq_length, hidden_size] corresponding + to the final hidden of the transformer encoder. + """ + return self.sequence_output + + def get_all_encoder_layers(self): + return self.all_encoder_layers + + def get_embedding_output(self): + """Gets output of the embedding lookup (i.e., input to the transformer). + + Returns: + float Tensor of shape [batch_size, seq_length, hidden_size] corresponding + to the output of the embedding layer, after summing the word + embeddings with the positional embeddings and the token type embeddings, + then performing layer normalization. This is the input to the transformer. + """ + return self.embedding_output + + def get_embedding_table(self): + return self.embedding_table + + +def gelu(x): + """Gaussian Error Linear Unit. + + This is a smoother version of the RELU. + Original paper: https://arxiv.org/abs/1606.08415 + Args: + x: float Tensor to perform activation. + + Returns: + `x` with the GELU activation applied. + """ + cdf = 0.5 * (1.0 + tf.tanh( + (np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3))))) + return x * cdf + + +def get_activation(activation_string): + """Maps a string to a Python function, e.g., "relu" => `tf.nn.relu`. + + Args: + activation_string: String name of the activation function. + + Returns: + A Python function corresponding to the activation function. If + `activation_string` is None, empty, or "linear", this will return None. + If `activation_string` is not a string, it will return `activation_string`. + + Raises: + ValueError: The `activation_string` does not correspond to a known + activation. + """ + + # We assume that anything that"s not a string is already an activation + # function, so we just return it. + if not isinstance(activation_string, six.string_types): + return activation_string + + if not activation_string: + return None + + act = activation_string.lower() + if act == "linear": + return None + elif act == "relu": + return tf.nn.relu + elif act == "gelu": + return gelu + elif act == "tanh": + return tf.tanh + else: + raise ValueError("Unsupported activation: %s" % act) + + +def get_assignment_map_from_checkpoint(tvars, init_checkpoint): + """Compute the union of the current variables and checkpoint variables.""" + assignment_map = {} + initialized_variable_names = {} + + name_to_variable = collections.OrderedDict() + for var in tvars: + name = var.name + m = re.match("^(.*):\\d+$", name) + if m is not None: + name = m.group(1) + name_to_variable[name] = var + + init_vars = tf.train.list_variables(init_checkpoint) + + assignment_map = collections.OrderedDict() + for x in init_vars: + (name, var) = (x[0], x[1]) + if name not in name_to_variable: + continue + assignment_map[name] = name + initialized_variable_names[name] = 1 + initialized_variable_names[name + ":0"] = 1 + + return (assignment_map, initialized_variable_names) + + +def dropout(input_tensor, dropout_prob): + """Perform dropout. + + Args: + input_tensor: float Tensor. + dropout_prob: Python float. The probability of dropping out a value (NOT of + *keeping* a dimension as in `tf.nn.dropout`). + + Returns: + A version of `input_tensor` with dropout applied. + """ + if dropout_prob is None or dropout_prob == 0.0: + return input_tensor + + output = tf.nn.dropout(input_tensor, 1.0 - dropout_prob) + return output + + +def layer_norm(input_tensor, name=None): + """Run layer normalization on the last dimension of the tensor.""" + return tf.contrib.layers.layer_norm( + inputs=input_tensor, begin_norm_axis=-1, begin_params_axis=-1, scope=name) + + +def layer_norm_and_dropout(input_tensor, dropout_prob, name=None): + """Runs layer normalization followed by dropout.""" + output_tensor = layer_norm(input_tensor, name) + output_tensor = dropout(output_tensor, dropout_prob) + return output_tensor + + +def create_initializer(initializer_range=0.02): + """Creates a `truncated_normal_initializer` with the given range.""" + return tf.truncated_normal_initializer(stddev=initializer_range) + + +def embedding_lookup(input_ids, + vocab_size, + embedding_size=128, + initializer_range=0.02, + word_embedding_name="word_embeddings", + use_one_hot_embeddings=False): + """Looks up words embeddings for id tensor. + + Args: + input_ids: int32 Tensor of shape [batch_size, seq_length] containing word + ids. + vocab_size: int. Size of the embedding vocabulary. + embedding_size: int. Width of the word embeddings. + initializer_range: float. Embedding initialization range. + word_embedding_name: string. Name of the embedding table. + use_one_hot_embeddings: bool. If True, use one-hot method for word + embeddings. If False, use `tf.gather()`. + + Returns: + float Tensor of shape [batch_size, seq_length, embedding_size]. + """ + # This function assumes that the input is of shape [batch_size, seq_length, + # num_inputs]. + # + # If the input is a 2D tensor of shape [batch_size, seq_length], we + # reshape to [batch_size, seq_length, 1]. + if input_ids.shape.ndims == 2: + input_ids = tf.expand_dims(input_ids, axis=[-1]) + + embedding_table = tf.get_variable( + name=word_embedding_name, + shape=[vocab_size, embedding_size], + initializer=create_initializer(initializer_range)) + + flat_input_ids = tf.reshape(input_ids, [-1]) + if use_one_hot_embeddings: + one_hot_input_ids = tf.one_hot(flat_input_ids, depth=vocab_size) + output = tf.matmul(one_hot_input_ids, embedding_table) + else: + output = tf.gather(embedding_table, flat_input_ids) + + input_shape = get_shape_list(input_ids) + + output = tf.reshape(output, + input_shape[0:-1] + [input_shape[-1] * embedding_size]) + return (output, embedding_table) + + +def embedding_postprocessor(input_tensor, + use_token_type=False, + token_type_ids=None, + token_type_vocab_size=16, + token_type_embedding_name="token_type_embeddings", + use_position_embeddings=True, + position_embedding_name="position_embeddings", + initializer_range=0.02, + max_position_embeddings=512, + dropout_prob=0.1): + """Performs various post-processing on a word embedding tensor. + + Args: + input_tensor: float Tensor of shape [batch_size, seq_length, + embedding_size]. + use_token_type: bool. Whether to add embeddings for `token_type_ids`. + token_type_ids: (optional) int32 Tensor of shape [batch_size, seq_length]. + Must be specified if `use_token_type` is True. + token_type_vocab_size: int. The vocabulary size of `token_type_ids`. + token_type_embedding_name: string. The name of the embedding table variable + for token type ids. + use_position_embeddings: bool. Whether to add position embeddings for the + position of each token in the sequence. + position_embedding_name: string. The name of the embedding table variable + for positional embeddings. + initializer_range: float. Range of the weight initialization. + max_position_embeddings: int. Maximum sequence length that might ever be + used with this model. This can be longer than the sequence length of + input_tensor, but cannot be shorter. + dropout_prob: float. Dropout probability applied to the final output tensor. + + Returns: + float tensor with same shape as `input_tensor`. + + Raises: + ValueError: One of the tensor shapes or input values is invalid. + """ + input_shape = get_shape_list(input_tensor, expected_rank=3) + batch_size = input_shape[0] + seq_length = input_shape[1] + width = input_shape[2] + + output = input_tensor + + if use_token_type: + if token_type_ids is None: + raise ValueError("`token_type_ids` must be specified if" + "`use_token_type` is True.") + token_type_table = tf.get_variable( + name=token_type_embedding_name, + shape=[token_type_vocab_size, width], + initializer=create_initializer(initializer_range)) + # This vocab will be small so we always do one-hot here, since it is always + # faster for a small vocabulary. + flat_token_type_ids = tf.reshape(token_type_ids, [-1]) + one_hot_ids = tf.one_hot(flat_token_type_ids, depth=token_type_vocab_size) + token_type_embeddings = tf.matmul(one_hot_ids, token_type_table) + token_type_embeddings = tf.reshape(token_type_embeddings, + [batch_size, seq_length, width]) + output += token_type_embeddings + + if use_position_embeddings: + assert_op = tf.assert_less_equal(seq_length, max_position_embeddings) + with tf.control_dependencies([assert_op]): + full_position_embeddings = tf.get_variable( + name=position_embedding_name, + shape=[max_position_embeddings, width], + initializer=create_initializer(initializer_range)) + # Since the position embedding table is a learned variable, we create it + # using a (long) sequence length `max_position_embeddings`. The actual + # sequence length might be shorter than this, for faster training of + # tasks that do not have long sequences. + # + # So `full_position_embeddings` is effectively an embedding table + # for position [0, 1, 2, ..., max_position_embeddings-1], and the current + # sequence has positions [0, 1, 2, ... seq_length-1], so we can just + # perform a slice. + position_embeddings = tf.slice(full_position_embeddings, [0, 0], + [seq_length, -1]) + num_dims = len(output.shape.as_list()) + + # Only the last two dimensions are relevant (`seq_length` and `width`), so + # we broadcast among the first dimensions, which is typically just + # the batch size. + position_broadcast_shape = [] + for _ in range(num_dims - 2): + position_broadcast_shape.append(1) + position_broadcast_shape.extend([seq_length, width]) + position_embeddings = tf.reshape(position_embeddings, + position_broadcast_shape) + output += position_embeddings + + output = layer_norm_and_dropout(output, dropout_prob) + return output + + +def create_attention_mask_from_input_mask(from_tensor, to_mask): + """Create 3D attention mask from a 2D tensor mask. + + Args: + from_tensor: 2D or 3D Tensor of shape [batch_size, from_seq_length, ...]. + to_mask: int32 Tensor of shape [batch_size, to_seq_length]. + + Returns: + float Tensor of shape [batch_size, from_seq_length, to_seq_length]. + """ + from_shape = get_shape_list(from_tensor, expected_rank=[2, 3]) + batch_size = from_shape[0] + from_seq_length = from_shape[1] + + to_shape = get_shape_list(to_mask, expected_rank=2) + to_seq_length = to_shape[1] + + to_mask = tf.cast( + tf.reshape(to_mask, [batch_size, 1, to_seq_length]), tf.float32) + + # We don't assume that `from_tensor` is a mask (although it could be). We + # don't actually care if we attend *from* padding tokens (only *to* padding) + # tokens so we create a tensor of all ones. + # + # `broadcast_ones` = [batch_size, from_seq_length, 1] + broadcast_ones = tf.ones( + shape=[batch_size, from_seq_length, 1], dtype=tf.float32) + + # Here we broadcast along two dimensions to create the mask. + mask = broadcast_ones * to_mask + + return mask + + +def attention_layer(from_tensor, + to_tensor, + attention_mask=None, + num_attention_heads=1, + size_per_head=512, + query_act=None, + key_act=None, + value_act=None, + attention_probs_dropout_prob=0.0, + initializer_range=0.02, + do_return_2d_tensor=False, + batch_size=None, + from_seq_length=None, + to_seq_length=None): + """Performs multi-headed attention from `from_tensor` to `to_tensor`. + + This is an implementation of multi-headed attention based on "Attention + is all you Need". If `from_tensor` and `to_tensor` are the same, then + this is self-attention. Each timestep in `from_tensor` attends to the + corresponding sequence in `to_tensor`, and returns a fixed-with vector. + + This function first projects `from_tensor` into a "query" tensor and + `to_tensor` into "key" and "value" tensors. These are (effectively) a list + of tensors of length `num_attention_heads`, where each tensor is of shape + [batch_size, seq_length, size_per_head]. + + Then, the query and key tensors are dot-producted and scaled. These are + softmaxed to obtain attention probabilities. The value tensors are then + interpolated by these probabilities, then concatenated back to a single + tensor and returned. + + In practice, the multi-headed attention are done with transposes and + reshapes rather than actual separate tensors. + + Args: + from_tensor: float Tensor of shape [batch_size, from_seq_length, + from_width]. + to_tensor: float Tensor of shape [batch_size, to_seq_length, to_width]. + attention_mask: (optional) int32 Tensor of shape [batch_size, + from_seq_length, to_seq_length]. The values should be 1 or 0. The + attention scores will effectively be set to -infinity for any positions in + the mask that are 0, and will be unchanged for positions that are 1. + num_attention_heads: int. Number of attention heads. + size_per_head: int. Size of each attention head. + query_act: (optional) Activation function for the query transform. + key_act: (optional) Activation function for the key transform. + value_act: (optional) Activation function for the value transform. + attention_probs_dropout_prob: (optional) float. Dropout probability of the + attention probabilities. + initializer_range: float. Range of the weight initializer. + do_return_2d_tensor: bool. If True, the output will be of shape [batch_size + * from_seq_length, num_attention_heads * size_per_head]. If False, the + output will be of shape [batch_size, from_seq_length, num_attention_heads + * size_per_head]. + batch_size: (Optional) int. If the input is 2D, this might be the batch size + of the 3D version of the `from_tensor` and `to_tensor`. + from_seq_length: (Optional) If the input is 2D, this might be the seq length + of the 3D version of the `from_tensor`. + to_seq_length: (Optional) If the input is 2D, this might be the seq length + of the 3D version of the `to_tensor`. + + Returns: + float Tensor of shape [batch_size, from_seq_length, + num_attention_heads * size_per_head]. (If `do_return_2d_tensor` is + true, this will be of shape [batch_size * from_seq_length, + num_attention_heads * size_per_head]). + + Raises: + ValueError: Any of the arguments or tensor shapes are invalid. + """ + + def transpose_for_scores(input_tensor, batch_size, num_attention_heads, + seq_length, width): + output_tensor = tf.reshape( + input_tensor, [batch_size, seq_length, num_attention_heads, width]) + + output_tensor = tf.transpose(output_tensor, [0, 2, 1, 3]) + return output_tensor + + from_shape = get_shape_list(from_tensor, expected_rank=[2, 3]) + to_shape = get_shape_list(to_tensor, expected_rank=[2, 3]) + + if len(from_shape) != len(to_shape): + raise ValueError( + "The rank of `from_tensor` must match the rank of `to_tensor`.") + + if len(from_shape) == 3: + batch_size = from_shape[0] + from_seq_length = from_shape[1] + to_seq_length = to_shape[1] + elif len(from_shape) == 2: + if (batch_size is None or from_seq_length is None or to_seq_length is None): + raise ValueError( + "When passing in rank 2 tensors to attention_layer, the values " + "for `batch_size`, `from_seq_length`, and `to_seq_length` " + "must all be specified.") + + # Scalar dimensions referenced here: + # B = batch size (number of sequences) + # F = `from_tensor` sequence length + # T = `to_tensor` sequence length + # N = `num_attention_heads` + # H = `size_per_head` + + from_tensor_2d = reshape_to_matrix(from_tensor) + to_tensor_2d = reshape_to_matrix(to_tensor) + + # `query_layer` = [B*F, N*H] + query_layer = tf.layers.dense( + from_tensor_2d, + num_attention_heads * size_per_head, + activation=query_act, + name="query", + kernel_initializer=create_initializer(initializer_range)) + + # `key_layer` = [B*T, N*H] + key_layer = tf.layers.dense( + to_tensor_2d, + num_attention_heads * size_per_head, + activation=key_act, + name="key", + kernel_initializer=create_initializer(initializer_range)) + + # `value_layer` = [B*T, N*H] + value_layer = tf.layers.dense( + to_tensor_2d, + num_attention_heads * size_per_head, + activation=value_act, + name="value", + kernel_initializer=create_initializer(initializer_range)) + + # `query_layer` = [B, N, F, H] + query_layer = transpose_for_scores(query_layer, batch_size, + num_attention_heads, from_seq_length, + size_per_head) + + # `key_layer` = [B, N, T, H] + key_layer = transpose_for_scores(key_layer, batch_size, num_attention_heads, + to_seq_length, size_per_head) + + # Take the dot product between "query" and "key" to get the raw + # attention scores. + # `attention_scores` = [B, N, F, T] + attention_scores = tf.matmul(query_layer, key_layer, transpose_b=True) + attention_scores = tf.multiply(attention_scores, + 1.0 / math.sqrt(float(size_per_head))) + + if attention_mask is not None: + # `attention_mask` = [B, 1, F, T] + attention_mask = tf.expand_dims(attention_mask, axis=[1]) + + # Since attention_mask is 1.0 for positions we want to attend and 0.0 for + # masked positions, this operation will create a tensor which is 0.0 for + # positions we want to attend and -10000.0 for masked positions. + adder = (1.0 - tf.cast(attention_mask, tf.float32)) * -10000.0 + + # Since we are adding it to the raw scores before the softmax, this is + # effectively the same as removing these entirely. + attention_scores += adder + + # Normalize the attention scores to probabilities. + # `attention_probs` = [B, N, F, T] + attention_probs = tf.nn.softmax(attention_scores) + + # This is actually dropping out entire tokens to attend to, which might + # seem a bit unusual, but is taken from the original Transformer paper. + attention_probs = dropout(attention_probs, attention_probs_dropout_prob) + + # `value_layer` = [B, T, N, H] + value_layer = tf.reshape( + value_layer, + [batch_size, to_seq_length, num_attention_heads, size_per_head]) + + # `value_layer` = [B, N, T, H] + value_layer = tf.transpose(value_layer, [0, 2, 1, 3]) + + # `context_layer` = [B, N, F, H] + context_layer = tf.matmul(attention_probs, value_layer) + + # `context_layer` = [B, F, N, H] + context_layer = tf.transpose(context_layer, [0, 2, 1, 3]) + + if do_return_2d_tensor: + # `context_layer` = [B*F, N*H] + context_layer = tf.reshape( + context_layer, + [batch_size * from_seq_length, num_attention_heads * size_per_head]) + else: + # `context_layer` = [B, F, N*H] + context_layer = tf.reshape( + context_layer, + [batch_size, from_seq_length, num_attention_heads * size_per_head]) + + return context_layer + + +def transformer_model(input_tensor, + attention_mask=None, + hidden_size=768, + num_hidden_layers=12, + num_attention_heads=12, + intermediate_size=3072, + intermediate_act_fn=gelu, + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + initializer_range=0.02, + do_return_all_layers=False): + """Multi-headed, multi-layer Transformer from "Attention is All You Need". + + This is almost an exact implementation of the original Transformer encoder. + + See the original paper: + https://arxiv.org/abs/1706.03762 + + Also see: + https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/models/transformer.py + + Args: + input_tensor: float Tensor of shape [batch_size, seq_length, hidden_size]. + attention_mask: (optional) int32 Tensor of shape [batch_size, seq_length, + seq_length], with 1 for positions that can be attended to and 0 in + positions that should not be. + hidden_size: int. Hidden size of the Transformer. + num_hidden_layers: int. Number of layers (blocks) in the Transformer. + num_attention_heads: int. Number of attention heads in the Transformer. + intermediate_size: int. The size of the "intermediate" (a.k.a., feed + forward) layer. + intermediate_act_fn: function. The non-linear activation function to apply + to the output of the intermediate/feed-forward layer. + hidden_dropout_prob: float. Dropout probability for the hidden layers. + attention_probs_dropout_prob: float. Dropout probability of the attention + probabilities. + initializer_range: float. Range of the initializer (stddev of truncated + normal). + do_return_all_layers: Whether to also return all layers or just the final + layer. + + Returns: + float Tensor of shape [batch_size, seq_length, hidden_size], the final + hidden layer of the Transformer. + + Raises: + ValueError: A Tensor shape or parameter is invalid. + """ + if hidden_size % num_attention_heads != 0: + raise ValueError( + "The hidden size (%d) is not a multiple of the number of attention " + "heads (%d)" % (hidden_size, num_attention_heads)) + + attention_head_size = int(hidden_size / num_attention_heads) + input_shape = get_shape_list(input_tensor, expected_rank=3) + batch_size = input_shape[0] + seq_length = input_shape[1] + input_width = input_shape[2] + + # The Transformer performs sum residuals on all layers so the input needs + # to be the same as the hidden size. + if input_width != hidden_size: + raise ValueError("The width of the input tensor (%d) != hidden size (%d)" % + (input_width, hidden_size)) + + # We keep the representation as a 2D tensor to avoid re-shaping it back and + # forth from a 3D tensor to a 2D tensor. Re-shapes are normally free on + # the GPU/CPU but may not be free on the TPU, so we want to minimize them to + # help the optimizer. + prev_output = reshape_to_matrix(input_tensor) + + all_layer_outputs = [] + for layer_idx in range(num_hidden_layers): + with tf.variable_scope("layer_%d" % layer_idx): + layer_input = prev_output + + with tf.variable_scope("attention"): + attention_heads = [] + with tf.variable_scope("self"): + attention_head = attention_layer( + from_tensor=layer_input, + to_tensor=layer_input, + attention_mask=attention_mask, + num_attention_heads=num_attention_heads, + size_per_head=attention_head_size, + attention_probs_dropout_prob=attention_probs_dropout_prob, + initializer_range=initializer_range, + do_return_2d_tensor=True, + batch_size=batch_size, + from_seq_length=seq_length, + to_seq_length=seq_length) + attention_heads.append(attention_head) + + attention_output = None + if len(attention_heads) == 1: + attention_output = attention_heads[0] + else: + # In the case where we have other sequences, we just concatenate + # them to the self-attention head before the projection. + attention_output = tf.concat(attention_heads, axis=-1) + + # Run a linear projection of `hidden_size` then add a residual + # with `layer_input`. + with tf.variable_scope("output"): + attention_output = tf.layers.dense( + attention_output, + hidden_size, + kernel_initializer=create_initializer(initializer_range)) + attention_output = dropout(attention_output, hidden_dropout_prob) + attention_output = layer_norm(attention_output + layer_input) + + # The activation is only applied to the "intermediate" hidden layer. + with tf.variable_scope("intermediate"): + intermediate_output = tf.layers.dense( + attention_output, + intermediate_size, + activation=intermediate_act_fn, + kernel_initializer=create_initializer(initializer_range)) + + # Down-project back to `hidden_size` then add the residual. + with tf.variable_scope("output"): + layer_output = tf.layers.dense( + intermediate_output, + hidden_size, + kernel_initializer=create_initializer(initializer_range)) + layer_output = dropout(layer_output, hidden_dropout_prob) + layer_output = layer_norm(layer_output + attention_output) + prev_output = layer_output + all_layer_outputs.append(layer_output) + + if do_return_all_layers: + final_outputs = [] + for layer_output in all_layer_outputs: + final_output = reshape_from_matrix(layer_output, input_shape) + final_outputs.append(final_output) + return final_outputs + else: + final_output = reshape_from_matrix(prev_output, input_shape) + return final_output + + +def get_shape_list(tensor, expected_rank=None, name=None): + """Returns a list of the shape of tensor, preferring static dimensions. + + Args: + tensor: A tf.Tensor object to find the shape of. + expected_rank: (optional) int. The expected rank of `tensor`. If this is + specified and the `tensor` has a different rank, and exception will be + thrown. + name: Optional name of the tensor for the error message. + + Returns: + A list of dimensions of the shape of tensor. All static dimensions will + be returned as python integers, and dynamic dimensions will be returned + as tf.Tensor scalars. + """ + if name is None: + name = tensor.name + + if expected_rank is not None: + assert_rank(tensor, expected_rank, name) + + shape = tensor.shape.as_list() + + non_static_indexes = [] + for (index, dim) in enumerate(shape): + if dim is None: + non_static_indexes.append(index) + + if not non_static_indexes: + return shape + + dyn_shape = tf.shape(tensor) + for index in non_static_indexes: + shape[index] = dyn_shape[index] + return shape + + +def reshape_to_matrix(input_tensor): + """Reshapes a >= rank 2 tensor to a rank 2 tensor (i.e., a matrix).""" + ndims = input_tensor.shape.ndims + if ndims < 2: + raise ValueError("Input tensor must have at least rank 2. Shape = %s" % + (input_tensor.shape)) + if ndims == 2: + return input_tensor + + width = input_tensor.shape[-1] + output_tensor = tf.reshape(input_tensor, [-1, width]) + return output_tensor + + +def reshape_from_matrix(output_tensor, orig_shape_list): + """Reshapes a rank 2 tensor back to its original rank >= 2 tensor.""" + if len(orig_shape_list) == 2: + return output_tensor + + output_shape = get_shape_list(output_tensor) + + orig_dims = orig_shape_list[0:-1] + width = output_shape[-1] + + return tf.reshape(output_tensor, orig_dims + [width]) + + +def assert_rank(tensor, expected_rank, name=None): + """Raises an exception if the tensor rank is not of the expected rank. + + Args: + tensor: A tf.Tensor to check the rank of. + expected_rank: Python integer or list of integers, expected rank. + name: Optional name of the tensor for the error message. + + Raises: + ValueError: If the expected shape doesn't match the actual shape. + """ + if name is None: + name = tensor.name + + expected_rank_dict = {} + if isinstance(expected_rank, six.integer_types): + expected_rank_dict[expected_rank] = True + else: + for x in expected_rank: + expected_rank_dict[x] = True + + actual_rank = tensor.shape.ndims + if actual_rank not in expected_rank_dict: + scope_name = tf.get_variable_scope().name + raise ValueError( + "For the tensor `%s` in scope `%s`, the actual rank " + "`%d` (shape = %s) is not equal to the expected rank `%s`" % + (name, scope_name, actual_rank, str(tensor.shape), str(expected_rank))) diff --git a/modeling_test.py b/modeling_test.py new file mode 100644 index 0000000..817ad2d --- /dev/null +++ b/modeling_test.py @@ -0,0 +1,277 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import json +import random +import re + +import modeling +import six +import tensorflow as tf + + +class BertModelTest(tf.test.TestCase): + + class BertModelTester(object): + + def __init__(self, + parent, + batch_size=13, + seq_length=7, + is_training=True, + use_input_mask=True, + use_token_type_ids=True, + vocab_size=99, + hidden_size=32, + num_hidden_layers=5, + num_attention_heads=4, + intermediate_size=37, + hidden_act="gelu", + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + max_position_embeddings=512, + type_vocab_size=16, + initializer_range=0.02, + scope=None): + self.parent = parent + self.batch_size = batch_size + self.seq_length = seq_length + self.is_training = is_training + self.use_input_mask = use_input_mask + self.use_token_type_ids = use_token_type_ids + self.vocab_size = vocab_size + self.hidden_size = hidden_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.intermediate_size = intermediate_size + self.hidden_act = hidden_act + self.hidden_dropout_prob = hidden_dropout_prob + self.attention_probs_dropout_prob = attention_probs_dropout_prob + self.max_position_embeddings = max_position_embeddings + self.type_vocab_size = type_vocab_size + self.initializer_range = initializer_range + self.scope = scope + + def create_model(self): + input_ids = BertModelTest.ids_tensor([self.batch_size, self.seq_length], + self.vocab_size) + + input_mask = None + if self.use_input_mask: + input_mask = BertModelTest.ids_tensor( + [self.batch_size, self.seq_length], vocab_size=2) + + token_type_ids = None + if self.use_token_type_ids: + token_type_ids = BertModelTest.ids_tensor( + [self.batch_size, self.seq_length], self.type_vocab_size) + + config = modeling.BertConfig( + vocab_size=self.vocab_size, + hidden_size=self.hidden_size, + num_hidden_layers=self.num_hidden_layers, + num_attention_heads=self.num_attention_heads, + intermediate_size=self.intermediate_size, + hidden_act=self.hidden_act, + hidden_dropout_prob=self.hidden_dropout_prob, + attention_probs_dropout_prob=self.attention_probs_dropout_prob, + max_position_embeddings=self.max_position_embeddings, + type_vocab_size=self.type_vocab_size, + initializer_range=self.initializer_range) + + model = modeling.BertModel( + config=config, + is_training=self.is_training, + input_ids=input_ids, + input_mask=input_mask, + token_type_ids=token_type_ids, + scope=self.scope) + + outputs = { + "embedding_output": model.get_embedding_output(), + "sequence_output": model.get_sequence_output(), + "pooled_output": model.get_pooled_output(), + "all_encoder_layers": model.get_all_encoder_layers(), + } + return outputs + + def check_output(self, result): + self.parent.assertAllEqual( + result["embedding_output"].shape, + [self.batch_size, self.seq_length, self.hidden_size]) + + self.parent.assertAllEqual( + result["sequence_output"].shape, + [self.batch_size, self.seq_length, self.hidden_size]) + + self.parent.assertAllEqual(result["pooled_output"].shape, + [self.batch_size, self.hidden_size]) + + def test_default(self): + self.run_tester(BertModelTest.BertModelTester(self)) + + def test_config_to_json_string(self): + config = modeling.BertConfig(vocab_size=99, hidden_size=37) + obj = json.loads(config.to_json_string()) + self.assertEqual(obj["vocab_size"], 99) + self.assertEqual(obj["hidden_size"], 37) + + def run_tester(self, tester): + with self.test_session() as sess: + ops = tester.create_model() + init_op = tf.group(tf.global_variables_initializer(), + tf.local_variables_initializer()) + sess.run(init_op) + output_result = sess.run(ops) + tester.check_output(output_result) + + self.assert_all_tensors_reachable(sess, [init_op, ops]) + + @classmethod + def ids_tensor(cls, shape, vocab_size, rng=None, name=None): + """Creates a random int32 tensor of the shape within the vocab size.""" + if rng is None: + rng = random.Random() + + total_dims = 1 + for dim in shape: + total_dims *= dim + + values = [] + for _ in range(total_dims): + values.append(rng.randint(0, vocab_size - 1)) + + return tf.constant(value=values, dtype=tf.int32, shape=shape, name=name) + + def assert_all_tensors_reachable(self, sess, outputs): + """Checks that all the tensors in the graph are reachable from outputs.""" + graph = sess.graph + + ignore_strings = [ + "^.*/assert_less_equal/.*$", + "^.*/dilation_rate$", + "^.*/Tensordot/concat$", + "^.*/Tensordot/concat/axis$", + "^testing/.*$", + ] + + ignore_regexes = [re.compile(x) for x in ignore_strings] + + unreachable = self.get_unreachable_ops(graph, outputs) + filtered_unreachable = [] + for x in unreachable: + do_ignore = False + for r in ignore_regexes: + m = r.match(x.name) + if m is not None: + do_ignore = True + if do_ignore: + continue + filtered_unreachable.append(x) + unreachable = filtered_unreachable + + self.assertEqual( + len(unreachable), 0, "The following ops are unreachable: %s" % + (" ".join([x.name for x in unreachable]))) + + @classmethod + def get_unreachable_ops(cls, graph, outputs): + """Finds all of the tensors in graph that are unreachable from outputs.""" + outputs = cls.flatten_recursive(outputs) + output_to_op = collections.defaultdict(list) + op_to_all = collections.defaultdict(list) + assign_out_to_in = collections.defaultdict(list) + + for op in graph.get_operations(): + for x in op.inputs: + op_to_all[op.name].append(x.name) + for y in op.outputs: + output_to_op[y.name].append(op.name) + op_to_all[op.name].append(y.name) + if str(op.type) == "Assign": + for y in op.outputs: + for x in op.inputs: + assign_out_to_in[y.name].append(x.name) + + assign_groups = collections.defaultdict(list) + for out_name in assign_out_to_in.keys(): + name_group = assign_out_to_in[out_name] + for n1 in name_group: + assign_groups[n1].append(out_name) + for n2 in name_group: + if n1 != n2: + assign_groups[n1].append(n2) + + seen_tensors = {} + stack = [x.name for x in outputs] + while stack: + name = stack.pop() + if name in seen_tensors: + continue + seen_tensors[name] = True + + if name in output_to_op: + for op_name in output_to_op[name]: + if op_name in op_to_all: + for input_name in op_to_all[op_name]: + if input_name not in stack: + stack.append(input_name) + + expanded_names = [] + if name in assign_groups: + for assign_name in assign_groups[name]: + expanded_names.append(assign_name) + + for expanded_name in expanded_names: + if expanded_name not in stack: + stack.append(expanded_name) + + unreachable_ops = [] + for op in graph.get_operations(): + is_unreachable = False + all_names = [x.name for x in op.inputs] + [x.name for x in op.outputs] + for name in all_names: + if name not in seen_tensors: + is_unreachable = True + if is_unreachable: + unreachable_ops.append(op) + return unreachable_ops + + @classmethod + def flatten_recursive(cls, item): + """Flattens (potentially nested) a tuple/dictionary/list to a list.""" + output = [] + if isinstance(item, list): + output.extend(item) + elif isinstance(item, tuple): + output.extend(list(item)) + elif isinstance(item, dict): + for (_, v) in six.iteritems(item): + output.append(v) + else: + return [item] + + flat_output = [] + for x in output: + flat_output.extend(cls.flatten_recursive(x)) + return flat_output + + +if __name__ == "__main__": + tf.test.main() diff --git a/optimization.py b/optimization.py new file mode 100644 index 0000000..d33dabd --- /dev/null +++ b/optimization.py @@ -0,0 +1,174 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Functions and classes related to optimization (weight updates).""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import re +import tensorflow as tf + + +def create_optimizer(loss, init_lr, num_train_steps, num_warmup_steps, use_tpu): + """Creates an optimizer training op.""" + global_step = tf.train.get_or_create_global_step() + + learning_rate = tf.constant(value=init_lr, shape=[], dtype=tf.float32) + + # Implements linear decay of the learning rate. + learning_rate = tf.train.polynomial_decay( + learning_rate, + global_step, + num_train_steps, + end_learning_rate=0.0, + power=1.0, + cycle=False) + + # Implements linear warmup. I.e., if global_step < num_warmup_steps, the + # learning rate will be `global_step/num_warmup_steps * init_lr`. + if num_warmup_steps: + global_steps_int = tf.cast(global_step, tf.int32) + warmup_steps_int = tf.constant(num_warmup_steps, dtype=tf.int32) + + global_steps_float = tf.cast(global_steps_int, tf.float32) + warmup_steps_float = tf.cast(warmup_steps_int, tf.float32) + + warmup_percent_done = global_steps_float / warmup_steps_float + warmup_learning_rate = init_lr * warmup_percent_done + + is_warmup = tf.cast(global_steps_int < warmup_steps_int, tf.float32) + learning_rate = ( + (1.0 - is_warmup) * learning_rate + is_warmup * warmup_learning_rate) + + # It is recommended that you use this optimizer for fine tuning, since this + # is how the model was trained (note that the Adam m/v variables are NOT + # loaded from init_checkpoint.) + optimizer = AdamWeightDecayOptimizer( + learning_rate=learning_rate, + weight_decay_rate=0.01, + beta_1=0.9, + beta_2=0.999, + epsilon=1e-6, + exclude_from_weight_decay=["LayerNorm", "layer_norm", "bias"]) + + if use_tpu: + optimizer = tf.contrib.tpu.CrossShardOptimizer(optimizer) + + tvars = tf.trainable_variables() + grads = tf.gradients(loss, tvars) + + # This is how the model was pre-trained. + (grads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0) + + train_op = optimizer.apply_gradients( + zip(grads, tvars), global_step=global_step) + + # Normally the global step update is done inside of `apply_gradients`. + # However, `AdamWeightDecayOptimizer` doesn't do this. But if you use + # a different optimizer, you should probably take this line out. + new_global_step = global_step + 1 + train_op = tf.group(train_op, [global_step.assign(new_global_step)]) + return train_op + + +class AdamWeightDecayOptimizer(tf.train.Optimizer): + """A basic Adam optimizer that includes "correct" L2 weight decay.""" + + def __init__(self, + learning_rate, + weight_decay_rate=0.0, + beta_1=0.9, + beta_2=0.999, + epsilon=1e-6, + exclude_from_weight_decay=None, + name="AdamWeightDecayOptimizer"): + """Constructs a AdamWeightDecayOptimizer.""" + super(AdamWeightDecayOptimizer, self).__init__(False, name) + + self.learning_rate = learning_rate + self.weight_decay_rate = weight_decay_rate + self.beta_1 = beta_1 + self.beta_2 = beta_2 + self.epsilon = epsilon + self.exclude_from_weight_decay = exclude_from_weight_decay + + def apply_gradients(self, grads_and_vars, global_step=None, name=None): + """See base class.""" + assignments = [] + for (grad, param) in grads_and_vars: + if grad is None or param is None: + continue + + param_name = self._get_variable_name(param.name) + + m = tf.get_variable( + name=param_name + "/adam_m", + shape=param.shape.as_list(), + dtype=tf.float32, + trainable=False, + initializer=tf.zeros_initializer()) + v = tf.get_variable( + name=param_name + "/adam_v", + shape=param.shape.as_list(), + dtype=tf.float32, + trainable=False, + initializer=tf.zeros_initializer()) + + # Standard Adam update. + next_m = ( + tf.multiply(self.beta_1, m) + tf.multiply(1.0 - self.beta_1, grad)) + next_v = ( + tf.multiply(self.beta_2, v) + tf.multiply(1.0 - self.beta_2, + tf.square(grad))) + + update = next_m / (tf.sqrt(next_v) + self.epsilon) + + # Just adding the square of the weights to the loss function is *not* + # the correct way of using L2 regularization/weight decay with Adam, + # since that will interact with the m and v parameters in strange ways. + # + # Instead we want ot decay the weights in a manner that doesn't interact + # with the m/v parameters. This is equivalent to adding the square + # of the weights to the loss with plain (non-momentum) SGD. + if self._do_use_weight_decay(param_name): + update += self.weight_decay_rate * param + + update_with_lr = self.learning_rate * update + + next_param = param - update_with_lr + + assignments.extend( + [param.assign(next_param), + m.assign(next_m), + v.assign(next_v)]) + return tf.group(*assignments, name=name) + + def _do_use_weight_decay(self, param_name): + """Whether to use L2 weight decay for `param_name`.""" + if not self.weight_decay_rate: + return False + if self.exclude_from_weight_decay: + for r in self.exclude_from_weight_decay: + if re.search(r, param_name) is not None: + return False + return True + + def _get_variable_name(self, param_name): + """Get the variable name from the tensor name.""" + m = re.match("^(.*):\\d+$", param_name) + if m is not None: + param_name = m.group(1) + return param_name diff --git a/optimization_test.py b/optimization_test.py new file mode 100644 index 0000000..4f2dcf1 --- /dev/null +++ b/optimization_test.py @@ -0,0 +1,48 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import optimization +import tensorflow as tf + + +class OptimizationTest(tf.test.TestCase): + + def test_adam(self): + with self.test_session() as sess: + w = tf.get_variable( + "w", + shape=[3], + initializer=tf.constant_initializer([0.1, -0.2, -0.1])) + x = tf.constant([0.4, 0.2, -0.5]) + loss = tf.reduce_mean(tf.square(x - w)) + tvars = tf.trainable_variables() + grads = tf.gradients(loss, tvars) + global_step = tf.train.get_or_create_global_step() + optimizer = optimization.AdamWeightDecayOptimizer(learning_rate=0.2) + train_op = optimizer.apply_gradients(zip(grads, tvars), global_step) + init_op = tf.group(tf.global_variables_initializer(), + tf.local_variables_initializer()) + sess.run(init_op) + for _ in range(100): + sess.run(train_op) + w_np = sess.run(w) + self.assertAllClose(w_np.flat, [0.4, 0.2, -0.5], rtol=1e-2, atol=1e-2) + + +if __name__ == "__main__": + tf.test.main() diff --git a/predicting_movie_reviews_with_bert_on_tf_hub.ipynb b/predicting_movie_reviews_with_bert_on_tf_hub.ipynb new file mode 100644 index 0000000..466857f --- /dev/null +++ b/predicting_movie_reviews_with_bert_on_tf_hub.ipynb @@ -0,0 +1,1231 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "Predicting Movie Reviews with BERT on TF Hub.ipynb", + "version": "0.3.2", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "metadata": { + "id": "j0a4mTk9o1Qg", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "# Copyright 2019 Google Inc.\n", + "\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "dCpvgG0vwXAZ", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#Predicting Movie Review Sentiment with BERT on TF Hub" + ] + }, + { + "metadata": { + "id": "xiYrZKaHwV81", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "If you’ve been following Natural Language Processing over the past year, you’ve probably heard of BERT: Bidirectional Encoder Representations from Transformers. It’s a neural network architecture designed by Google researchers that’s totally transformed what’s state-of-the-art for NLP tasks, like text classification, translation, summarization, and question answering.\n", + "\n", + "Now that BERT's been added to [TF Hub](https://www.tensorflow.org/hub) as a loadable module, it's easy(ish) to add into existing Tensorflow text pipelines. In an existing pipeline, BERT can replace text embedding layers like ELMO and GloVE. Alternatively, [finetuning](http://wiki.fast.ai/index.php/Fine_tuning) BERT can provide both an accuracy boost and faster training time in many cases.\n", + "\n", + "Here, we'll train a model to predict whether an IMDB movie review is positive or negative using BERT in Tensorflow with tf hub. Some code was adapted from [this colab notebook](https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb). Let's get started!" + ] + }, + { + "metadata": { + "id": "hsZvic2YxnTz", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "from sklearn.model_selection import train_test_split\n", + "import pandas as pd\n", + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "from datetime import datetime" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "cp5wfXDx5SPH", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "In addition to the standard libraries we imported above, we'll need to install BERT's python package." + ] + }, + { + "metadata": { + "id": "jviywGyWyKsA", + "colab_type": "code", + "outputId": "166f3005-d219-404f-b201-2a0b75480360", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 51 + } + }, + "cell_type": "code", + "source": [ + "!pip install bert-tensorflow" + ], + "execution_count": 38, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Requirement already satisfied: bert-tensorflow in /usr/local/lib/python3.6/dist-packages (1.0.1)\n", + "Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from bert-tensorflow) (1.11.0)\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "hhbGEfwgdEtw", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "import bert\n", + "from bert import run_classifier\n", + "from bert import optimization\n", + "from bert import tokenization" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "KVB3eOcjxxm1", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "Below, we'll set an output directory location to store our model output and checkpoints. This can be a local directory, in which case you'd set OUTPUT_DIR to the name of the directory you'd like to create. If you're running this code in Google's hosted Colab, the directory won't persist after the Colab session ends.\n", + "\n", + "Alternatively, if you're a GCP user, you can store output in a GCP bucket. To do that, set a directory name in OUTPUT_DIR and the name of the GCP bucket in the BUCKET field.\n", + "\n", + "Set DO_DELETE to rewrite the OUTPUT_DIR if it exists. Otherwise, Tensorflow will load existing model checkpoints from that directory (if they exist)." + ] + }, + { + "metadata": { + "id": "US_EAnICvP7f", + "colab_type": "code", + "outputId": "7780a032-31d4-4794-e6aa-664a5d2ae7dd", + "cellView": "form", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + } + }, + "cell_type": "code", + "source": [ + "# Set the output directory for saving model file\n", + "# Optionally, set a GCP bucket location\n", + "\n", + "OUTPUT_DIR = 'OUTPUT_DIR_NAME'#@param {type:\"string\"}\n", + "#@markdown Whether or not to clear/delete the directory and create a new one\n", + "DO_DELETE = False #@param {type:\"boolean\"}\n", + "#@markdown Set USE_BUCKET and BUCKET if you want to (optionally) store model output on GCP bucket.\n", + "USE_BUCKET = True #@param {type:\"boolean\"}\n", + "BUCKET = 'BUCKET_NAME' #@param {type:\"string\"}\n", + "\n", + "if USE_BUCKET:\n", + " OUTPUT_DIR = 'gs://{}/{}'.format(BUCKET, OUTPUT_DIR)\n", + " from google.colab import auth\n", + " auth.authenticate_user()\n", + "\n", + "if DO_DELETE:\n", + " try:\n", + " tf.gfile.DeleteRecursively(OUTPUT_DIR)\n", + " except:\n", + " # Doesn't matter if the directory didn't exist\n", + " pass\n", + "tf.gfile.MakeDirs(OUTPUT_DIR)\n", + "print('***** Model output directory: {} *****'.format(OUTPUT_DIR))\n" + ], + "execution_count": 40, + "outputs": [ + { + "output_type": "stream", + "text": [ + "***** Model output directory: gs://bert-tfhub/aclImdb_v1 *****\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "pmFYvkylMwXn", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#Data" + ] + }, + { + "metadata": { + "id": "MC_w8SRqN0fr", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "First, let's download the dataset, hosted by Stanford. The code below, which downloads, extracts, and imports the IMDB Large Movie Review Dataset, is borrowed from [this Tensorflow tutorial](https://www.tensorflow.org/hub/tutorials/text_classification_with_tf_hub)." + ] + }, + { + "metadata": { + "id": "fom_ff20gyy6", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "from tensorflow import keras\n", + "import os\n", + "import re\n", + "\n", + "# Load all files from a directory in a DataFrame.\n", + "def load_directory_data(directory):\n", + " data = {}\n", + " data[\"sentence\"] = []\n", + " data[\"sentiment\"] = []\n", + " for file_path in os.listdir(directory):\n", + " with tf.gfile.GFile(os.path.join(directory, file_path), \"r\") as f:\n", + " data[\"sentence\"].append(f.read())\n", + " data[\"sentiment\"].append(re.match(\"\\d+_(\\d+)\\.txt\", file_path).group(1))\n", + " return pd.DataFrame.from_dict(data)\n", + "\n", + "# Merge positive and negative examples, add a polarity column and shuffle.\n", + "def load_dataset(directory):\n", + " pos_df = load_directory_data(os.path.join(directory, \"pos\"))\n", + " neg_df = load_directory_data(os.path.join(directory, \"neg\"))\n", + " pos_df[\"polarity\"] = 1\n", + " neg_df[\"polarity\"] = 0\n", + " return pd.concat([pos_df, neg_df]).sample(frac=1).reset_index(drop=True)\n", + "\n", + "# Download and process the dataset files.\n", + "def download_and_load_datasets(force_download=False):\n", + " dataset = tf.keras.utils.get_file(\n", + " fname=\"aclImdb.tar.gz\", \n", + " origin=\"http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz\", \n", + " extract=True)\n", + " \n", + " train_df = load_dataset(os.path.join(os.path.dirname(dataset), \n", + " \"aclImdb\", \"train\"))\n", + " test_df = load_dataset(os.path.join(os.path.dirname(dataset), \n", + " \"aclImdb\", \"test\"))\n", + " \n", + " return train_df, test_df\n" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "2abfwdn-g135", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "train, test = download_and_load_datasets()" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "XA8WHJgzhIZf", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "To keep training fast, we'll take a sample of 5000 train and test examples, respectively." + ] + }, + { + "metadata": { + "id": "lw_F488eixTV", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "train = train.sample(5000)\n", + "test = test.sample(5000)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "prRQM8pDi8xI", + "colab_type": "code", + "outputId": "34445cb8-2be0-4379-fdbc-7794091f6049", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + } + }, + "cell_type": "code", + "source": [ + "train.columns" + ], + "execution_count": 44, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Index(['sentence', 'sentiment', 'polarity'], dtype='object')" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 44 + } + ] + }, + { + "metadata": { + "id": "sfRnHSz3iSXz", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "For us, our input data is the 'sentence' column and our label is the 'polarity' column (0, 1 for negative and positive, respecitvely)" + ] + }, + { + "metadata": { + "id": "IuMOGwFui4it", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "DATA_COLUMN = 'sentence'\n", + "LABEL_COLUMN = 'polarity'\n", + "# label_list is the list of labels, i.e. True, False or 0, 1 or 'dog', 'cat'\n", + "label_list = [0, 1]" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "V399W0rqNJ-Z", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#Data Preprocessing\n", + "We'll need to transform our data into a format BERT understands. This involves two steps. First, we create `InputExample`'s using the constructor provided in the BERT library.\n", + "\n", + "- `text_a` is the text we want to classify, which in this case, is the `Request` field in our Dataframe. \n", + "- `text_b` is used if we're training a model to understand the relationship between sentences (i.e. is `text_b` a translation of `text_a`? Is `text_b` an answer to the question asked by `text_a`?). This doesn't apply to our task, so we can leave `text_b` blank.\n", + "- `label` is the label for our example, i.e. True, False" + ] + }, + { + "metadata": { + "id": "p9gEt5SmM6i6", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "# Use the InputExample class from BERT's run_classifier code to create examples from the data\n", + "train_InputExamples = train.apply(lambda x: bert.run_classifier.InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this example\n", + " text_a = x[DATA_COLUMN], \n", + " text_b = None, \n", + " label = x[LABEL_COLUMN]), axis = 1)\n", + "\n", + "test_InputExamples = test.apply(lambda x: bert.run_classifier.InputExample(guid=None, \n", + " text_a = x[DATA_COLUMN], \n", + " text_b = None, \n", + " label = x[LABEL_COLUMN]), axis = 1)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "SCZWZtKxObjh", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "Next, we need to preprocess our data so that it matches the data BERT was trained on. For this, we'll need to do a couple of things (but don't worry--this is also included in the Python library):\n", + "\n", + "\n", + "1. Lowercase our text (if we're using a BERT lowercase model)\n", + "2. Tokenize it (i.e. \"sally says hi\" -> [\"sally\", \"says\", \"hi\"])\n", + "3. Break words into WordPieces (i.e. \"calling\" -> [\"call\", \"##ing\"])\n", + "4. Map our words to indexes using a vocab file that BERT provides\n", + "5. Add special \"CLS\" and \"SEP\" tokens (see the [readme](https://github.com/google-research/bert))\n", + "6. Append \"index\" and \"segment\" tokens to each input (see the [BERT paper](https://arxiv.org/pdf/1810.04805.pdf))\n", + "\n", + "Happily, we don't have to worry about most of these details.\n", + "\n", + "\n" + ] + }, + { + "metadata": { + "id": "qMWiDtpyQSoU", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "To start, we'll need to load a vocabulary file and lowercasing information directly from the BERT tf hub module:" + ] + }, + { + "metadata": { + "id": "IhJSe0QHNG7U", + "colab_type": "code", + "outputId": "20b28cc7-3cb3-4ce6-bfff-a7847ce3bbaa", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + } + }, + "cell_type": "code", + "source": [ + "# This is a path to an uncased (all lowercase) version of BERT\n", + "BERT_MODEL_HUB = \"https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1\"\n", + "\n", + "def create_tokenizer_from_hub_module():\n", + " \"\"\"Get the vocab file and casing info from the Hub module.\"\"\"\n", + " with tf.Graph().as_default():\n", + " bert_module = hub.Module(BERT_MODEL_HUB)\n", + " tokenization_info = bert_module(signature=\"tokenization_info\", as_dict=True)\n", + " with tf.Session() as sess:\n", + " vocab_file, do_lower_case = sess.run([tokenization_info[\"vocab_file\"],\n", + " tokenization_info[\"do_lower_case\"]])\n", + " \n", + " return bert.tokenization.FullTokenizer(\n", + " vocab_file=vocab_file, do_lower_case=do_lower_case)\n", + "\n", + "tokenizer = create_tokenizer_from_hub_module()" + ], + "execution_count": 47, + "outputs": [ + { + "output_type": "stream", + "text": [ + "INFO:tensorflow:Saver not created because there are no variables in the graph to restore\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "z4oFkhpZBDKm", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "Great--we just learned that the BERT model we're using expects lowercase data (that's what stored in tokenization_info[\"do_lower_case\"]) and we also loaded BERT's vocab file. We also created a tokenizer, which breaks words into word pieces:" + ] + }, + { + "metadata": { + "id": "dsBo6RCtQmwx", + "colab_type": "code", + "outputId": "9af8c917-90ec-4fe9-897b-79dc89ca88e1", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 221 + } + }, + "cell_type": "code", + "source": [ + "tokenizer.tokenize(\"This here's an example of using the BERT tokenizer\")" + ], + "execution_count": 48, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['this',\n", + " 'here',\n", + " \"'\",\n", + " 's',\n", + " 'an',\n", + " 'example',\n", + " 'of',\n", + " 'using',\n", + " 'the',\n", + " 'bert',\n", + " 'token',\n", + " '##izer']" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 48 + } + ] + }, + { + "metadata": { + "id": "0OEzfFIt6GIc", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "Using our tokenizer, we'll call `run_classifier.convert_examples_to_features` on our InputExamples to convert them into features BERT understands." + ] + }, + { + "metadata": { + "id": "LL5W8gEGRTAf", + "colab_type": "code", + "outputId": "65001dda-155b-48fc-b5fc-1e4cabc8dfbf", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1261 + } + }, + "cell_type": "code", + "source": [ + "# We'll set sequences to be at most 128 tokens long.\n", + "MAX_SEQ_LENGTH = 128\n", + "# Convert our train and test features to InputFeatures that BERT understands.\n", + "train_features = bert.run_classifier.convert_examples_to_features(train_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)\n", + "test_features = bert.run_classifier.convert_examples_to_features(test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)" + ], + "execution_count": 49, + "outputs": [ + { + "output_type": "stream", + "text": [ + "INFO:tensorflow:Writing example 0 of 5000\n", + "INFO:tensorflow:*** Example ***\n", + "INFO:tensorflow:guid: None\n", + "INFO:tensorflow:tokens: [CLS] i ' m watching this on the sci - fi channel right now . it ' s so horrible i can ' t stop watching it ! i ' m a video ##grapher and this movie makes me sad . i feel bad for anyone associated with this movie . some of the camera work is good . most is very questionable . there are a few decent actors in the flick . too bad they ' re surrounded by what must have been the director ' s relatives . that ' s the only way they could have been qualified to be in a movie ! music was a little better than the acting . if you get around to watching this i hope it [SEP]\n", + "INFO:tensorflow:input_ids: 101 1045 1005 1049 3666 2023 2006 1996 16596 1011 10882 3149 2157 2085 1012 2009 1005 1055 2061 9202 1045 2064 1005 1056 2644 3666 2009 999 1045 1005 1049 1037 2678 18657 1998 2023 3185 3084 2033 6517 1012 1045 2514 2919 2005 3087 3378 2007 2023 3185 1012 2070 1997 1996 4950 2147 2003 2204 1012 2087 2003 2200 21068 1012 2045 2024 1037 2261 11519 5889 1999 1996 17312 1012 2205 2919 2027 1005 2128 5129 2011 2054 2442 2031 2042 1996 2472 1005 1055 9064 1012 2008 1005 1055 1996 2069 2126 2027 2071 2031 2042 4591 2000 2022 1999 1037 3185 999 2189 2001 1037 2210 2488 2084 1996 3772 1012 2065 2017 2131 2105 2000 3666 2023 1045 3246 2009 102\n", + "INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n", + "INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "INFO:tensorflow:label: 0 (id = 0)\n", + "INFO:tensorflow:*** Example ***\n", + "INFO:tensorflow:guid: None\n", + "INFO:tensorflow:tokens: [CLS] i have been a fan of pushing dai ##sies since the very beginning . it is wonderful ##ly thought up , and bryan fuller has the most remarkable ideas for this show . < br / > < br / > it is unbelievable on how much tv has been needing a creative , original show like pushing dai ##sies . it is a huge relief to see a show , that is unlike the rest , where as , if you compared it to some of the newer shows , such as scrub ##s and house , you would see the similarities , and it does get ted ##ious at moments to see shows so close in identity . < br / > < br [SEP]\n", + "INFO:tensorflow:input_ids: 101 1045 2031 2042 1037 5470 1997 6183 18765 14625 2144 1996 2200 2927 1012 2009 2003 6919 2135 2245 2039 1010 1998 8527 12548 2038 1996 2087 9487 4784 2005 2023 2265 1012 1026 7987 1013 1028 1026 7987 1013 1028 2009 2003 23653 2006 2129 2172 2694 2038 2042 11303 1037 5541 1010 2434 2265 2066 6183 18765 14625 1012 2009 2003 1037 4121 4335 2000 2156 1037 2265 1010 2008 2003 4406 1996 2717 1010 2073 2004 1010 2065 2017 4102 2009 2000 2070 1997 1996 10947 3065 1010 2107 2004 18157 2015 1998 2160 1010 2017 2052 2156 1996 12319 1010 1998 2009 2515 2131 6945 6313 2012 5312 2000 2156 3065 2061 2485 1999 4767 1012 1026 7987 1013 1028 1026 7987 102\n", + "INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n", + "INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "INFO:tensorflow:label: 1 (id = 1)\n", + "INFO:tensorflow:*** Example ***\n", + "INFO:tensorflow:guid: None\n", + "INFO:tensorflow:tokens: [CLS] this movie starts out promising ##ly , with an early scene in which frank morgan advises against gary cooper ' s marriage to his daughter , anita louise . frank morgan , playing an una ##bas ##hed gold - digger , loudly complain ##s to cooper about his perceived pen ##ury at the hands of his family - including his daughter , anita louise . i am a fan of all 3 actors . frank morgan is ( to my mind ) a hollywood treasure , cooper a legend , and louise a very lovely , versatile and under - appreciated actress seldom seen in the leading role . i also have nothing against teresa wright , and while not blessed with great range , she [SEP]\n", + "INFO:tensorflow:input_ids: 101 2023 3185 4627 2041 10015 2135 1010 2007 2019 2220 3496 1999 2029 3581 5253 25453 2114 5639 6201 1005 1055 3510 2000 2010 2684 1010 12918 8227 1012 3581 5253 1010 2652 2019 14477 22083 9072 2751 1011 28661 1010 9928 17612 2015 2000 6201 2055 2010 8690 7279 13098 2012 1996 2398 1997 2010 2155 1011 2164 2010 2684 1010 12918 8227 1012 1045 2572 1037 5470 1997 2035 1017 5889 1012 3581 5253 2003 1006 2000 2026 2568 1007 1037 5365 8813 1010 6201 1037 5722 1010 1998 8227 1037 2200 8403 1010 22979 1998 2104 1011 12315 3883 15839 2464 1999 1996 2877 2535 1012 1045 2036 2031 2498 2114 12409 6119 1010 1998 2096 2025 10190 2007 2307 2846 1010 2016 102\n", + "INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n", + "INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "INFO:tensorflow:label: 0 (id = 0)\n", + "INFO:tensorflow:*** Example ***\n", + "INFO:tensorflow:guid: None\n", + "INFO:tensorflow:tokens: [CLS] i was over ##taken by the emotion . un ##for ##get ##table rendering of a wartime story which is unknown to most people . the performances were fault ##less and outstanding . [SEP]\n", + "INFO:tensorflow:input_ids: 101 1045 2001 2058 25310 2011 1996 7603 1012 4895 29278 18150 10880 14259 1997 1037 12498 2466 2029 2003 4242 2000 2087 2111 1012 1996 4616 2020 6346 3238 1998 5151 1012 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "INFO:tensorflow:label: 1 (id = 1)\n", + "INFO:tensorflow:*** Example ***\n", + "INFO:tensorflow:guid: None\n", + "INFO:tensorflow:tokens: [CLS] soldier blue is a movie with pre ##tension ##s : pre ##tension ##s to be some sort of profound statement on man ' s inhuman ##ity to man , on the white man ' s exploitation of and brutality towards indigenous peoples ; a biting , un ##fl ##in ##ching and sar ##don ##ic commentary on the horrors of vietnam . well , sorry , but it fails mis ##era ##bly to be any of those things . what soldier blue actually is is per ##nic ##ious , tri ##te , badly made , dish ##ones ##t rubbish . < br / > < br / > another reviewer here hit the nail on the head in saying that it appears to be a hybrid of [SEP]\n", + "INFO:tensorflow:input_ids: 101 5268 2630 2003 1037 3185 2007 3653 29048 2015 1024 3653 29048 2015 2000 2022 2070 4066 1997 13769 4861 2006 2158 1005 1055 29582 3012 2000 2158 1010 2006 1996 2317 2158 1005 1055 14427 1997 1998 24083 2875 6284 7243 1025 1037 12344 1010 4895 10258 2378 8450 1998 18906 5280 2594 8570 2006 1996 22812 1997 5148 1012 2092 1010 3374 1010 2021 2009 11896 28616 6906 6321 2000 2022 2151 1997 2216 2477 1012 2054 5268 2630 2941 2003 2003 2566 8713 6313 1010 13012 2618 1010 6649 2081 1010 9841 21821 2102 29132 1012 1026 7987 1013 1028 1026 7987 1013 1028 2178 12027 2182 2718 1996 13774 2006 1996 2132 1999 3038 2008 2009 3544 2000 2022 1037 8893 1997 102\n", + "INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n", + "INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "INFO:tensorflow:label: 0 (id = 0)\n", + "INFO:tensorflow:Writing example 0 of 5000\n", + "INFO:tensorflow:*** Example ***\n", + "INFO:tensorflow:guid: None\n", + "INFO:tensorflow:tokens: [CLS] i just watched this today on tv . it was on abc ' s sunday afternoon movie . < br / > < br / > this wasn ' t a very good movie , but for a low budget independent film like this , it was okay . there is some suspense in it , but there are so many bad qualities that really bring the movie down . the script is pretty lame , and the plot elements aren ' t very realistic , such as the way a 911 operator would laugh and hang up when someone is reporting a murder . i don ' t know what the writer was thinking when they came up with that idea , but it isn [SEP]\n", + "INFO:tensorflow:input_ids: 101 1045 2074 3427 2023 2651 2006 2694 1012 2009 2001 2006 5925 1005 1055 4465 5027 3185 1012 1026 7987 1013 1028 1026 7987 1013 1028 2023 2347 1005 1056 1037 2200 2204 3185 1010 2021 2005 1037 2659 5166 2981 2143 2066 2023 1010 2009 2001 3100 1012 2045 2003 2070 23873 1999 2009 1010 2021 2045 2024 2061 2116 2919 11647 2008 2428 3288 1996 3185 2091 1012 1996 5896 2003 3492 20342 1010 1998 1996 5436 3787 4995 1005 1056 2200 12689 1010 2107 2004 1996 2126 1037 19989 6872 2052 4756 1998 6865 2039 2043 2619 2003 7316 1037 4028 1012 1045 2123 1005 1056 2113 2054 1996 3213 2001 3241 2043 2027 2234 2039 2007 2008 2801 1010 2021 2009 3475 102\n", + "INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n", + "INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "INFO:tensorflow:label: 0 (id = 0)\n", + "INFO:tensorflow:*** Example ***\n", + "INFO:tensorflow:guid: None\n", + "INFO:tensorflow:tokens: [CLS] from hardly alien sounding lasers , to an elementary school style shuttle crash , \" night ##be ##ast \" is better classified as a far ##cic ##al mix of fake blood and bare chest . the almost pornographic style of the film seems to be a failed attempt to recover from a lack of co ##hesive or effective story . the acting however is not nearly as beast ##ly , many of the young , aspiring , actors ad ##mir ##ably showcase a hidden talent . particularly don lei ##fer ##t and jamie ze ##mare ##l , who shed a well needed sha ##rd of light on this otherwise terrible film . night ##be ##ast would have never shown up on set had he known the [SEP]\n", + "INFO:tensorflow:input_ids: 101 2013 6684 7344 9391 23965 1010 2000 2019 4732 2082 2806 10382 5823 1010 1000 2305 4783 14083 1000 2003 2488 6219 2004 1037 2521 19053 2389 4666 1997 8275 2668 1998 6436 3108 1012 1996 2471 26932 2806 1997 1996 2143 3849 2000 2022 1037 3478 3535 2000 8980 2013 1037 3768 1997 2522 21579 2030 4621 2466 1012 1996 3772 2174 2003 2025 3053 2004 6841 2135 1010 2116 1997 1996 2402 1010 22344 1010 5889 4748 14503 8231 13398 1037 5023 5848 1012 3391 2123 26947 7512 2102 1998 6175 27838 24376 2140 1010 2040 8328 1037 2092 2734 21146 4103 1997 2422 2006 2023 4728 6659 2143 1012 2305 4783 14083 2052 2031 2196 3491 2039 2006 2275 2018 2002 2124 1996 102\n", + "INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n", + "INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "INFO:tensorflow:label: 0 (id = 0)\n", + "INFO:tensorflow:*** Example ***\n", + "INFO:tensorflow:guid: None\n", + "INFO:tensorflow:tokens: [CLS] here we have the in ##imi ##table charlie chaplin for ##sa ##king his slap ##stick past to tackle the serious subject of anti - semi ##tism , and into ##ler ##ance in general . he portrays two characters - the sweet , innocent jewish barber - a war veteran , and the ravi ##ng and ruthless dictator , aden ##oid h ##yn ##kel . the jewish ghetto in this country is not safe for long , due to the w ##him ##s of h ##yn ##kel and his armed thugs , who routinely rough up its residents , or leave them alone , dependent upon his mood that day or week . the barber is among them , but is befriended by his former commanding officer [SEP]\n", + "INFO:tensorflow:input_ids: 101 2182 2057 2031 1996 1999 27605 10880 4918 23331 2005 3736 6834 2010 14308 21354 2627 2000 11147 1996 3809 3395 1997 3424 1011 4100 17456 1010 1998 2046 3917 6651 1999 2236 1012 2002 17509 2048 3494 1011 1996 4086 1010 7036 3644 13362 1011 1037 2162 8003 1010 1998 1996 16806 3070 1998 18101 21237 1010 16298 9314 1044 6038 11705 1012 1996 3644 17276 1999 2023 2406 2003 2025 3647 2005 2146 1010 2349 2000 1996 1059 14341 2015 1997 1044 6038 11705 1998 2010 4273 24106 1010 2040 19974 5931 2039 2049 3901 1010 2030 2681 2068 2894 1010 7790 2588 2010 6888 2008 2154 2030 2733 1012 1996 13362 2003 2426 2068 1010 2021 2003 23386 2011 2010 2280 7991 2961 102\n", + "INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n", + "INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "INFO:tensorflow:label: 1 (id = 1)\n", + "INFO:tensorflow:*** Example ***\n", + "INFO:tensorflow:guid: None\n", + "INFO:tensorflow:tokens: [CLS] i really hated this movie and it ' s the first movie written by stephen king that i didn ' t finish . i was truly disappointed , it was the worst crap i ' ve ever seen . what were you thinking making three hours out of it ? it may have a quite good story , but actors ? no . suspense ? no . romance ? no . horror ? no . it didn ' t have anything . < br / > < br / > it ' s got this strange , crazy science man with einstein - hair , the classic thing . not real at all . and a man keep getting younger all the time . it seems [SEP]\n", + "INFO:tensorflow:input_ids: 101 1045 2428 6283 2023 3185 1998 2009 1005 1055 1996 2034 3185 2517 2011 4459 2332 2008 1045 2134 1005 1056 3926 1012 1045 2001 5621 9364 1010 2009 2001 1996 5409 10231 1045 1005 2310 2412 2464 1012 2054 2020 2017 3241 2437 2093 2847 2041 1997 2009 1029 2009 2089 2031 1037 3243 2204 2466 1010 2021 5889 1029 2053 1012 23873 1029 2053 1012 7472 1029 2053 1012 5469 1029 2053 1012 2009 2134 1005 1056 2031 2505 1012 1026 7987 1013 1028 1026 7987 1013 1028 2009 1005 1055 2288 2023 4326 1010 4689 2671 2158 2007 15313 1011 2606 1010 1996 4438 2518 1012 2025 2613 2012 2035 1012 1998 1037 2158 2562 2893 3920 2035 1996 2051 1012 2009 3849 102\n", + "INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n", + "INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "INFO:tensorflow:label: 0 (id = 0)\n", + "INFO:tensorflow:*** Example ***\n", + "INFO:tensorflow:guid: None\n", + "INFO:tensorflow:tokens: [CLS] story chinese tall story tells the story of righteous monk trip ##ita ##ka , who , along with his guardians monkey , sandy and pigs ##y make their journey west on a quest to recover ancient sutra ##s , finally , they reach the final leg of their journey in sha ##che city but all is not as it seems when the city is attacked by evil tree demons . monkey tries his best to battle them but is overwhelmed , knowing his master is in grave danger , he uses his trust ##y golden staff to thrust trip ##ita ##ka to safety . < br / > < br / > the monk ends up being knocked out when he land and when he wakes [SEP]\n", + "INFO:tensorflow:input_ids: 101 2466 2822 4206 2466 4136 1996 2466 1997 19556 8284 4440 6590 2912 1010 2040 1010 2247 2007 2010 14240 10608 1010 7525 1998 14695 2100 2191 2037 4990 2225 2006 1037 8795 2000 8980 3418 26567 2015 1010 2633 1010 2027 3362 1996 2345 4190 1997 2037 4990 1999 21146 5403 2103 2021 2035 2003 2025 2004 2009 3849 2043 1996 2103 2003 4457 2011 4763 3392 7942 1012 10608 5363 2010 2190 2000 2645 2068 2021 2003 13394 1010 4209 2010 3040 2003 1999 6542 5473 1010 2002 3594 2010 3404 2100 3585 3095 2000 7400 4440 6590 2912 2000 3808 1012 1026 7987 1013 1028 1026 7987 1013 1028 1996 8284 4515 2039 2108 6573 2041 2043 2002 2455 1998 2043 2002 17507 102\n", + "INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n", + "INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "INFO:tensorflow:label: 1 (id = 1)\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "ccp5trMwRtmr", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#Creating a model\n", + "\n", + "Now that we've prepared our data, let's focus on building a model. `create_model` does just this below. First, it loads the BERT tf hub module again (this time to extract the computation graph). Next, it creates a single new layer that will be trained to adapt BERT to our sentiment task (i.e. classifying whether a movie review is positive or negative). This strategy of using a mostly trained model is called [fine-tuning](http://wiki.fast.ai/index.php/Fine_tuning)." + ] + }, + { + "metadata": { + "id": "6o2a5ZIvRcJq", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "def create_model(is_predicting, input_ids, input_mask, segment_ids, labels,\n", + " num_labels):\n", + " \"\"\"Creates a classification model.\"\"\"\n", + "\n", + " bert_module = hub.Module(\n", + " BERT_MODEL_HUB,\n", + " trainable=True)\n", + " bert_inputs = dict(\n", + " input_ids=input_ids,\n", + " input_mask=input_mask,\n", + " segment_ids=segment_ids)\n", + " bert_outputs = bert_module(\n", + " inputs=bert_inputs,\n", + " signature=\"tokens\",\n", + " as_dict=True)\n", + "\n", + " # Use \"pooled_output\" for classification tasks on an entire sentence.\n", + " # Use \"sequence_outputs\" for token-level output.\n", + " output_layer = bert_outputs[\"pooled_output\"]\n", + "\n", + " hidden_size = output_layer.shape[-1].value\n", + "\n", + " # Create our own layer to tune for politeness data.\n", + " output_weights = tf.get_variable(\n", + " \"output_weights\", [num_labels, hidden_size],\n", + " initializer=tf.truncated_normal_initializer(stddev=0.02))\n", + "\n", + " output_bias = tf.get_variable(\n", + " \"output_bias\", [num_labels], initializer=tf.zeros_initializer())\n", + "\n", + " with tf.variable_scope(\"loss\"):\n", + "\n", + " # Dropout helps prevent overfitting\n", + " output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)\n", + "\n", + " logits = tf.matmul(output_layer, output_weights, transpose_b=True)\n", + " logits = tf.nn.bias_add(logits, output_bias)\n", + " log_probs = tf.nn.log_softmax(logits, axis=-1)\n", + "\n", + " # Convert labels into one-hot encoding\n", + " one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)\n", + "\n", + " predicted_labels = tf.squeeze(tf.argmax(log_probs, axis=-1, output_type=tf.int32))\n", + " # If we're predicting, we want predicted labels and the probabiltiies.\n", + " if is_predicting:\n", + " return (predicted_labels, log_probs)\n", + "\n", + " # If we're train/eval, compute loss between predicted and actual label\n", + " per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)\n", + " loss = tf.reduce_mean(per_example_loss)\n", + " return (loss, predicted_labels, log_probs)\n" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "qpE0ZIDOCQzE", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "Next we'll wrap our model function in a `model_fn_builder` function that adapts our model to work for training, evaluation, and prediction." + ] + }, + { + "metadata": { + "id": "FnH-AnOQ9KKW", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "# model_fn_builder actually creates our model function\n", + "# using the passed parameters for num_labels, learning_rate, etc.\n", + "def model_fn_builder(num_labels, learning_rate, num_train_steps,\n", + " num_warmup_steps):\n", + " \"\"\"Returns `model_fn` closure for TPUEstimator.\"\"\"\n", + " def model_fn(features, labels, mode, params): # pylint: disable=unused-argument\n", + " \"\"\"The `model_fn` for TPUEstimator.\"\"\"\n", + "\n", + " input_ids = features[\"input_ids\"]\n", + " input_mask = features[\"input_mask\"]\n", + " segment_ids = features[\"segment_ids\"]\n", + " label_ids = features[\"label_ids\"]\n", + "\n", + " is_predicting = (mode == tf.estimator.ModeKeys.PREDICT)\n", + " \n", + " # TRAIN and EVAL\n", + " if not is_predicting:\n", + "\n", + " (loss, predicted_labels, log_probs) = create_model(\n", + " is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)\n", + "\n", + " train_op = bert.optimization.create_optimizer(\n", + " loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu=False)\n", + "\n", + " # Calculate evaluation metrics. \n", + " def metric_fn(label_ids, predicted_labels):\n", + " accuracy = tf.metrics.accuracy(label_ids, predicted_labels)\n", + " f1_score = tf.contrib.metrics.f1_score(\n", + " label_ids,\n", + " predicted_labels)\n", + " auc = tf.metrics.auc(\n", + " label_ids,\n", + " predicted_labels)\n", + " recall = tf.metrics.recall(\n", + " label_ids,\n", + " predicted_labels)\n", + " precision = tf.metrics.precision(\n", + " label_ids,\n", + " predicted_labels) \n", + " true_pos = tf.metrics.true_positives(\n", + " label_ids,\n", + " predicted_labels)\n", + " true_neg = tf.metrics.true_negatives(\n", + " label_ids,\n", + " predicted_labels) \n", + " false_pos = tf.metrics.false_positives(\n", + " label_ids,\n", + " predicted_labels) \n", + " false_neg = tf.metrics.false_negatives(\n", + " label_ids,\n", + " predicted_labels)\n", + " return {\n", + " \"eval_accuracy\": accuracy,\n", + " \"f1_score\": f1_score,\n", + " \"auc\": auc,\n", + " \"precision\": precision,\n", + " \"recall\": recall,\n", + " \"true_positives\": true_pos,\n", + " \"true_negatives\": true_neg,\n", + " \"false_positives\": false_pos,\n", + " \"false_negatives\": false_neg\n", + " }\n", + "\n", + " eval_metrics = metric_fn(label_ids, predicted_labels)\n", + "\n", + " if mode == tf.estimator.ModeKeys.TRAIN:\n", + " return tf.estimator.EstimatorSpec(mode=mode,\n", + " loss=loss,\n", + " train_op=train_op)\n", + " else:\n", + " return tf.estimator.EstimatorSpec(mode=mode,\n", + " loss=loss,\n", + " eval_metric_ops=eval_metrics)\n", + " else:\n", + " (predicted_labels, log_probs) = create_model(\n", + " is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)\n", + "\n", + " predictions = {\n", + " 'probabilities': log_probs,\n", + " 'labels': predicted_labels\n", + " }\n", + " return tf.estimator.EstimatorSpec(mode, predictions=predictions)\n", + "\n", + " # Return the actual model function in the closure\n", + " return model_fn\n" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "OjwJ4bTeWXD8", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "# Compute train and warmup steps from batch size\n", + "# These hyperparameters are copied from this colab notebook (https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb)\n", + "BATCH_SIZE = 32\n", + "LEARNING_RATE = 2e-5\n", + "NUM_TRAIN_EPOCHS = 3.0\n", + "# Warmup is a period of time where hte learning rate \n", + "# is small and gradually increases--usually helps training.\n", + "WARMUP_PROPORTION = 0.1\n", + "# Model configs\n", + "SAVE_CHECKPOINTS_STEPS = 500\n", + "SAVE_SUMMARY_STEPS = 100" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "emHf9GhfWBZ_", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "# Compute # train and warmup steps from batch size\n", + "num_train_steps = int(len(train_features) / BATCH_SIZE * NUM_TRAIN_EPOCHS)\n", + "num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "oEJldMr3WYZa", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "# Specify outpit directory and number of checkpoint steps to save\n", + "run_config = tf.estimator.RunConfig(\n", + " model_dir=OUTPUT_DIR,\n", + " save_summary_steps=SAVE_SUMMARY_STEPS,\n", + " save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "q_WebpS1X97v", + "colab_type": "code", + "outputId": "1648932a-7391-49d3-8af7-52d514e226e8", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 156 + } + }, + "cell_type": "code", + "source": [ + "model_fn = model_fn_builder(\n", + " num_labels=len(label_list),\n", + " learning_rate=LEARNING_RATE,\n", + " num_train_steps=num_train_steps,\n", + " num_warmup_steps=num_warmup_steps)\n", + "\n", + "estimator = tf.estimator.Estimator(\n", + " model_fn=model_fn,\n", + " config=run_config,\n", + " params={\"batch_size\": BATCH_SIZE})\n" + ], + "execution_count": 55, + "outputs": [ + { + "output_type": "stream", + "text": [ + "INFO:tensorflow:Using config: {'_model_dir': 'gs://bert-tfhub/aclImdb_v1', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 500, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true\n", + "graph_options {\n", + " rewrite_options {\n", + " meta_optimizer_iterations: ONE\n", + " }\n", + "}\n", + ", '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': , '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "NOO3RfG1DYLo", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "Next we create an input builder function that takes our training feature set (`train_features`) and produces a generator. This is a pretty standard design pattern for working with Tensorflow [Estimators](https://www.tensorflow.org/guide/estimators)." + ] + }, + { + "metadata": { + "id": "1Pv2bAlOX_-K", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "# Create an input function for training. drop_remainder = True for using TPUs.\n", + "train_input_fn = bert.run_classifier.input_fn_builder(\n", + " features=train_features,\n", + " seq_length=MAX_SEQ_LENGTH,\n", + " is_training=True,\n", + " drop_remainder=False)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "t6Nukby2EB6-", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "Now we train our model! For me, using a Colab notebook running on Google's GPUs, my training time was about 14 minutes." + ] + }, + { + "metadata": { + "id": "nucD4gluYJmK", + "colab_type": "code", + "outputId": "5d728e72-4631-42bf-c48d-3f51d4b968ce", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + } + }, + "cell_type": "code", + "source": [ + "print(f'Beginning Training!')\n", + "current_time = datetime.now()\n", + "estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)\n", + "print(\"Training took time \", datetime.now() - current_time)" + ], + "execution_count": 57, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Beginning Training!\n", + "INFO:tensorflow:Skipping training since max_steps has already saved.\n", + "Training took time 0:00:00.759709\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "CmbLTVniARy3", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "Now let's use our test data to see how well our model did:" + ] + }, + { + "metadata": { + "id": "JIhejfpyJ8Bx", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "test_input_fn = run_classifier.input_fn_builder(\n", + " features=test_features,\n", + " seq_length=MAX_SEQ_LENGTH,\n", + " is_training=False,\n", + " drop_remainder=False)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "PPVEXhNjYXC-", + "colab_type": "code", + "outputId": "dd5482cd-c558-465f-c854-ec11a0175316", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 445 + } + }, + "cell_type": "code", + "source": [ + "estimator.evaluate(input_fn=test_input_fn, steps=None)" + ], + "execution_count": 59, + "outputs": [ + { + "output_type": "stream", + "text": [ + "INFO:tensorflow:Calling model_fn.\n", + "INFO:tensorflow:Saver not created because there are no variables in the graph to restore\n" + ], + "name": "stdout" + }, + { + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gradients_impl.py:110: UserWarning: Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " \"Converting sparse IndexedSlices to a dense Tensor of unknown shape. \"\n" + ], + "name": "stderr" + }, + { + "output_type": "stream", + "text": [ + "INFO:tensorflow:Done calling model_fn.\n", + "INFO:tensorflow:Starting evaluation at 2019-02-12T21:04:20Z\n", + "INFO:tensorflow:Graph was finalized.\n", + "INFO:tensorflow:Restoring parameters from gs://bert-tfhub/aclImdb_v1/model.ckpt-468\n", + "INFO:tensorflow:Running local_init_op.\n", + "INFO:tensorflow:Done running local_init_op.\n", + "INFO:tensorflow:Finished evaluation at 2019-02-12-21:06:05\n", + "INFO:tensorflow:Saving dict for global step 468: auc = 0.86659324, eval_accuracy = 0.8664, f1_score = 0.8659711, false_negatives = 375.0, false_positives = 293.0, global_step = 468, loss = 0.51870537, precision = 0.880457, recall = 0.8519542, true_negatives = 2174.0, true_positives = 2158.0\n", + "INFO:tensorflow:Saving 'checkpoint_path' summary for global step 468: gs://bert-tfhub/aclImdb_v1/model.ckpt-468\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "{'auc': 0.86659324,\n", + " 'eval_accuracy': 0.8664,\n", + " 'f1_score': 0.8659711,\n", + " 'false_negatives': 375.0,\n", + " 'false_positives': 293.0,\n", + " 'global_step': 468,\n", + " 'loss': 0.51870537,\n", + " 'precision': 0.880457,\n", + " 'recall': 0.8519542,\n", + " 'true_negatives': 2174.0,\n", + " 'true_positives': 2158.0}" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 59 + } + ] + }, + { + "metadata": { + "id": "ueKsULteiz1B", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "Now let's write code to make predictions on new sentences:" + ] + }, + { + "metadata": { + "id": "OsrbTD2EJTVl", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "def getPrediction(in_sentences):\n", + " labels = [\"Negative\", \"Positive\"]\n", + " input_examples = [run_classifier.InputExample(guid=\"\", text_a = x, text_b = None, label = 0) for x in in_sentences] # here, \"\" is just a dummy label\n", + " input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)\n", + " predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False)\n", + " predictions = estimator.predict(predict_input_fn)\n", + " return [(sentence, prediction['probabilities'], labels[prediction['labels']]) for sentence, prediction in zip(in_sentences, predictions)]" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "-thbodgih_VJ", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "pred_sentences = [\n", + " \"That movie was absolutely awful\",\n", + " \"The acting was a bit lacking\",\n", + " \"The film was creative and surprising\",\n", + " \"Absolutely fantastic!\"\n", + "]" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "QrZmvZySKQTm", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 649 + }, + "outputId": "3891fafb-a460-4eb8-fa6c-335a5bbc10e5" + }, + "cell_type": "code", + "source": [ + "predictions = getPrediction(pred_sentences)" + ], + "execution_count": 72, + "outputs": [ + { + "output_type": "stream", + "text": [ + "INFO:tensorflow:Writing example 0 of 4\n", + "INFO:tensorflow:*** Example ***\n", + "INFO:tensorflow:guid: \n", + "INFO:tensorflow:tokens: [CLS] that movie was absolutely awful [SEP]\n", + "INFO:tensorflow:input_ids: 101 2008 3185 2001 7078 9643 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "INFO:tensorflow:label: 0 (id = 0)\n", + "INFO:tensorflow:*** Example ***\n", + "INFO:tensorflow:guid: \n", + "INFO:tensorflow:tokens: [CLS] the acting was a bit lacking [SEP]\n", + "INFO:tensorflow:input_ids: 101 1996 3772 2001 1037 2978 11158 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "INFO:tensorflow:label: 0 (id = 0)\n", + "INFO:tensorflow:*** Example ***\n", + "INFO:tensorflow:guid: \n", + "INFO:tensorflow:tokens: [CLS] the film was creative and surprising [SEP]\n", + "INFO:tensorflow:input_ids: 101 1996 2143 2001 5541 1998 11341 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "INFO:tensorflow:label: 0 (id = 0)\n", + "INFO:tensorflow:*** Example ***\n", + "INFO:tensorflow:guid: \n", + "INFO:tensorflow:tokens: [CLS] absolutely fantastic ! [SEP]\n", + "INFO:tensorflow:input_ids: 101 7078 10392 999 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "INFO:tensorflow:input_mask: 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "INFO:tensorflow:label: 0 (id = 0)\n", + "INFO:tensorflow:Calling model_fn.\n", + "INFO:tensorflow:Saver not created because there are no variables in the graph to restore\n", + "INFO:tensorflow:Done calling model_fn.\n", + "INFO:tensorflow:Graph was finalized.\n", + "INFO:tensorflow:Restoring parameters from gs://bert-tfhub/aclImdb_v1/model.ckpt-468\n", + "INFO:tensorflow:Running local_init_op.\n", + "INFO:tensorflow:Done running local_init_op.\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "MXkRiEBUqN3n", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "Voila! We have a sentiment classifier!" + ] + }, + { + "metadata": { + "id": "ERkTE8-7oQLZ", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 221 + }, + "outputId": "26c33224-dc2c-4b3d-f7b4-ac3ef0a58b27" + }, + "cell_type": "code", + "source": [ + "predictions" + ], + "execution_count": 73, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[('That movie was absolutely awful',\n", + " array([-4.9142293e-03, -5.3180690e+00], dtype=float32),\n", + " 'Negative'),\n", + " ('The acting was a bit lacking',\n", + " array([-0.03325794, -3.4200459 ], dtype=float32),\n", + " 'Negative'),\n", + " ('The film was creative and surprising',\n", + " array([-5.3589125e+00, -4.7171740e-03], dtype=float32),\n", + " 'Positive'),\n", + " ('Absolutely fantastic!',\n", + " array([-5.0434084 , -0.00647258], dtype=float32),\n", + " 'Positive')]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 73 + } + ] + } + ] +} \ No newline at end of file diff --git a/run_classifier.py b/run_classifier.py new file mode 100644 index 0000000..1ef4f06 --- /dev/null +++ b/run_classifier.py @@ -0,0 +1,1056 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""BERT finetuning runner.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import csv +import os +import modeling +import optimization +import tokenization +import tensorflow as tf + +flags = tf.flags + +FLAGS = flags.FLAGS + +## Required parameters +flags.DEFINE_string( + "data_dir", None, + "The input data dir. Should contain the .tsv files (or other data files) " + "for the task.") + +flags.DEFINE_string( + "bert_config_file", None, + "The config json file corresponding to the pre-trained BERT model. " + "This specifies the model architecture.") + +flags.DEFINE_string("task_name", None, "The name of the task to train.") + +flags.DEFINE_string("vocab_file", None, + "The vocabulary file that the BERT model was trained on.") + +flags.DEFINE_string( + "output_dir", None, + "The output directory where the model checkpoints will be written.") + +## Other parameters + +flags.DEFINE_string( + "init_checkpoint", None, + "Initial checkpoint (usually from a pre-trained BERT model).") + +flags.DEFINE_bool( + "do_lower_case", True, + "Whether to lower case the input text. Should be True for uncased " + "models and False for cased models.") + +flags.DEFINE_integer( + "max_seq_length", 128, + "The maximum total input sequence length after WordPiece tokenization. " + "Sequences longer than this will be truncated, and sequences shorter " + "than this will be padded.") + +flags.DEFINE_bool("do_train", False, "Whether to run training.") + +flags.DEFINE_bool("do_eval", False, "Whether to run eval on the dev set.") + +flags.DEFINE_bool( + "do_predict", False, + "Whether to run the model in inference mode on the test set.") + +flags.DEFINE_integer("train_batch_size", 32, "Total batch size for training.") + +flags.DEFINE_integer("eval_batch_size", 8, "Total batch size for eval.") + +flags.DEFINE_integer("predict_batch_size", 8, "Total batch size for predict.") + +flags.DEFINE_float("learning_rate", 5e-5, "The initial learning rate for Adam.") + +flags.DEFINE_float("num_train_epochs", 3.0, + "Total number of training epochs to perform.") + +flags.DEFINE_float( + "warmup_proportion", 0.1, + "Proportion of training to perform linear learning rate warmup for. " + "E.g., 0.1 = 10% of training.") + +flags.DEFINE_integer("save_checkpoints_steps", 1000, + "How often to save the model checkpoint.") + +flags.DEFINE_integer("iterations_per_loop", 1000, + "How many steps to make in each estimator call.") + +flags.DEFINE_bool("use_tpu", False, "Whether to use TPU or GPU/CPU.") + +tf.flags.DEFINE_string( + "tpu_name", None, + "The Cloud TPU to use for training. This should be either the name " + "used when creating the Cloud TPU, or a grpc://ip.address.of.tpu:8470 " + "url.") + +tf.flags.DEFINE_string( + "tpu_zone", None, + "[Optional] GCE zone where the Cloud TPU is located in. If not " + "specified, we will attempt to automatically detect the GCE project from " + "metadata.") + +tf.flags.DEFINE_string( + "gcp_project", None, + "[Optional] Project name for the Cloud TPU-enabled project. If not " + "specified, we will attempt to automatically detect the GCE project from " + "metadata.") + +tf.flags.DEFINE_string("master", None, "[Optional] TensorFlow master URL.") + +flags.DEFINE_integer( + "num_tpu_cores", 8, + "Only used if `use_tpu` is True. Total number of TPU cores to use.") + + +class InputExample(object): + """A single training/test example for simple sequence classification.""" + + def __init__(self, guid, text_a, text_b=None, label=None): + """Constructs a InputExample. + + Args: + guid: Unique id for the example. + text_a: string. The untokenized text of the first sequence. For single + sequence tasks, only this sequence must be specified. + text_b: (Optional) string. The untokenized text of the second sequence. + Only must be specified for sequence pair tasks. + label: (Optional) string. The label of the example. This should be + specified for train and dev examples, but not for test examples. + """ + self.guid = guid + self.text_a = text_a + self.text_b = text_b + self.label = label + + +class PaddingInputExample(object): + """Fake example so the num input examples is a multiple of the batch size. + + When running eval/predict on the TPU, we need to pad the number of examples + to be a multiple of the batch size, because the TPU requires a fixed batch + size. The alternative is to drop the last batch, which is bad because it means + the entire output data won't be generated. + + We use this class instead of `None` because treating `None` as padding + battches could cause silent errors. + """ + + +class InputFeatures(object): + """A single set of features of data.""" + + def __init__(self, + input_ids, + input_mask, + segment_ids, + label_id, + is_real_example=True): + self.input_ids = input_ids + self.input_mask = input_mask + self.segment_ids = segment_ids + self.label_id = label_id + self.is_real_example = is_real_example + + +class DataProcessor(object): + """Base class for data converters for sequence classification data sets.""" + + def get_train_examples(self, data_dir): + """Gets a collection of `InputExample`s for the train set.""" + raise NotImplementedError() + + def get_dev_examples(self, data_dir): + """Gets a collection of `InputExample`s for the dev set.""" + raise NotImplementedError() + + def get_test_examples(self, data_dir): + """Gets a collection of `InputExample`s for prediction.""" + raise NotImplementedError() + + def get_labels(self): + """Gets the list of labels for this data set.""" + raise NotImplementedError() + + @classmethod + def _read_tsv(cls, input_file, quotechar=None): + """Reads a tab separated value file.""" + with tf.gfile.Open(input_file, "r") as f: + reader = csv.reader(f, delimiter="\t", quotechar=quotechar) + lines = [] + for line in reader: + lines.append(line) + return lines + + +class XnliProcessor(DataProcessor): + """Processor for the XNLI data set.""" + + def __init__(self): + self.language = "zh" + + def get_train_examples(self, data_dir): + """See base class.""" + lines = self._read_tsv( + os.path.join(data_dir, "multinli", + "multinli.train.%s.tsv" % self.language)) + examples = [] + for (i, line) in enumerate(lines): + if i == 0: + continue + guid = "train-%d" % (i) + text_a = tokenization.convert_to_unicode(line[0]) + text_b = tokenization.convert_to_unicode(line[1]) + label = tokenization.convert_to_unicode(line[2]) + if label == tokenization.convert_to_unicode("contradictory"): + label = tokenization.convert_to_unicode("contradiction") + examples.append( + InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) + return examples + + def get_dev_examples(self, data_dir): + """See base class.""" + lines = self._read_tsv(os.path.join(data_dir, "xnli.dev.tsv")) + examples = [] + for (i, line) in enumerate(lines): + if i == 0: + continue + guid = "dev-%d" % (i) + language = tokenization.convert_to_unicode(line[0]) + if language != tokenization.convert_to_unicode(self.language): + continue + text_a = tokenization.convert_to_unicode(line[6]) + text_b = tokenization.convert_to_unicode(line[7]) + label = tokenization.convert_to_unicode(line[1]) + examples.append( + InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) + return examples + + def get_labels(self): + """See base class.""" + return ["contradiction", "entailment", "neutral"] + + +class MnliProcessor(DataProcessor): + """Processor for the MultiNLI data set (GLUE version).""" + + def get_train_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "train.tsv")), "train") + + def get_dev_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "dev_matched.tsv")), + "dev_matched") + + def get_test_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "test_matched.tsv")), "test") + + def get_labels(self): + """See base class.""" + return ["contradiction", "entailment", "neutral"] + + def _create_examples(self, lines, set_type): + """Creates examples for the training and dev sets.""" + examples = [] + for (i, line) in enumerate(lines): + if i == 0: + continue + guid = "%s-%s" % (set_type, tokenization.convert_to_unicode(line[0])) + text_a = tokenization.convert_to_unicode(line[8]) + text_b = tokenization.convert_to_unicode(line[9]) + if set_type == "test": + label = "contradiction" + else: + label = tokenization.convert_to_unicode(line[-1]) + examples.append( + InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) + return examples + + +class MrpcProcessor(DataProcessor): + """Processor for the MRPC data set (GLUE version).""" + + def get_train_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "train.tsv")), "train") + + def get_dev_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev") + + def get_test_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "test.tsv")), "test") + + def get_labels(self): + """See base class.""" + return ["0", "1"] + + def _create_examples(self, lines, set_type): + """Creates examples for the training and dev sets.""" + examples = [] + for (i, line) in enumerate(lines): + if i == 0: + continue + guid = "%s-%s" % (set_type, i) + text_a = tokenization.convert_to_unicode(line[3]) + text_b = tokenization.convert_to_unicode(line[4]) + if set_type == "test": + label = "0" + else: + label = tokenization.convert_to_unicode(line[0]) + examples.append( + InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) + return examples + + +class ColaProcessor(DataProcessor): + """Processor for the CoLA data set (GLUE version).""" + + def get_train_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "train.tsv")), "train") + + def get_dev_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev") + + def get_test_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "test.tsv")), "test") + + def get_labels(self): + """See base class.""" + return ["0", "1"] + + def _create_examples(self, lines, set_type): + """Creates examples for the training and dev sets.""" + examples = [] + for (i, line) in enumerate(lines): + # Only the test set has a header + if set_type == "test" and i == 0: + continue + guid = "%s-%s" % (set_type, i) + if set_type == "test": + text_a = tokenization.convert_to_unicode(line[1]) + label = "0" + else: + text_a = tokenization.convert_to_unicode(line[3]) + label = tokenization.convert_to_unicode(line[1]) + examples.append( + InputExample(guid=guid, text_a=text_a, text_b=None, label=label)) + return examples + + +import dealing_dataset + + +class EPProcessor(DataProcessor): + """Processor for the Emotion data set .""" + + def get_train_examples(self, data_dir): + """定义开发集的数据是什么,data_dir会作为参数传进去, 这里就是加上你的文件名即可 """ + return self._create_examples("amki_train") + + def get_dev_examples(self, data_dir): + """定义开发集的数据是什么,data_dir会作为参数传进去,模型训练的时候会用到,这里就是加上你的文件名即可 """ + return self._create_examples("amki_dev") + + def get_test_examples(self, data_dir): + """定义测试集的数据是什么, 用于预测数据 ,在训练时没有用到这个函数, 这里写预测的数据集""" + return self._create_examples("amki_test") + + def get_labels(self): + """ 这里是显示你一共有几个分类标签, 在此任务中我有3个标签,如实写上 标签值和 csv里面存的值相同 """ + return [0, 1, 2] + + def _create_examples(self, data_table): + """这个函数是用来把数据处理, 把每一个例子分成3个部分,填入到InputExample的3个参数 + text_a 是 第一个句子的文本 + text_b 是 第二个句子的文本 但是由于此任务是单句分类, 所以 这里传入为None + guid 是一个二元组 第一个表示此数据是什么数据集类型(train dev test) 第二个表示数据标号 + label 表示句子类别 + """ + examples = [] + for column in dealing_dataset.create_dataset_ep(data_table): + # 加入样本 + examples.append( + InputExample(guid=column[0], text_a=column[2], text_b=None, label=column[1])) + + return examples + + +class EPBPTProcessor(DataProcessor): + """Processor for the Emotion data set .""" + + def get_train_examples(self, data_dir): + """定义开发集的数据是什么,data_dir会作为参数传进去, 这里就是加上你的文件名即可 """ + return self._create_examples("amki_train") + + def get_dev_examples(self, data_dir): + """定义开发集的数据是什么,data_dir会作为参数传进去,模型训练的时候会用到,这里就是加上你的文件名即可 """ + return self._create_examples("amki_dev") + + def get_test_examples(self, data_dir): + """定义测试集的数据是什么, 用于预测数据 ,在训练时没有用到这个函数, 这里写预测的数据集""" + return self._create_examples("amki_test") + + def get_labels(self): + """ 这里是显示你一共有几个分类标签, 在此任务中我有3个标签,如实写上 标签值和 csv里面存的值相同 """ + return [0, 1, 2] + + def _create_examples(self, data_table): + """这个函数是用来把数据处理, 把每一个例子分成3个部分,填入到InputExample的3个参数 + text_a 是 第一个句子的文本 + text_b 是 第二个句子的文本 但是由于此任务是单句分类, 所以 这里传入为None + guid 是一个二元组 第一个表示此数据是什么数据集类型(train dev test) 第二个表示数据标号 + label 表示句子类别 + """ + examples = [] + for column in dealing_dataset.create_dataset_pdt(): + # 加入样本 + examples.append( + InputExample(guid=column[0], text_a=column[2], text_b=None, label=column[1])) + + return examples + + +def convert_single_example(ex_index, example, label_list, max_seq_length, + tokenizer): + """Converts a single `InputExample` into a single `InputFeatures`.""" + + if isinstance(example, PaddingInputExample): + return InputFeatures( + input_ids=[0] * max_seq_length, + input_mask=[0] * max_seq_length, + segment_ids=[0] * max_seq_length, + label_id=0, + is_real_example=False) + + label_map = {} + for (i, label) in enumerate(label_list): + label_map[label] = i + + tokens_a = tokenizer.tokenize(example.text_a) + tokens_b = None + if example.text_b: + tokens_b = tokenizer.tokenize(example.text_b) + + if tokens_b: + # Modifies `tokens_a` and `tokens_b` in place so that the total + # length is less than the specified length. + # Account for [CLS], [SEP], [SEP] with "- 3" + _truncate_seq_pair(tokens_a, tokens_b, max_seq_length - 3) + else: + # Account for [CLS] and [SEP] with "- 2" + if len(tokens_a) > max_seq_length - 2: + tokens_a = tokens_a[0:(max_seq_length - 2)] + + # The convention in BERT is: + # (a) For sequence pairs: + # tokens: [CLS] is this jack ##son ##ville ? [SEP] no it is not . [SEP] + # type_ids: 0 0 0 0 0 0 0 0 1 1 1 1 1 1 + # (b) For single sequences: + # tokens: [CLS] the dog is hairy . [SEP] + # type_ids: 0 0 0 0 0 0 0 + # + # Where "type_ids" are used to indicate whether this is the first + # sequence or the second sequence. The embedding vectors for `type=0` and + # `type=1` were learned during pre-training and are added to the wordpiece + # embedding vector (and position vector). This is not *strictly* necessary + # since the [SEP] token unambiguously separates the sequences, but it makes + # it easier for the model to learn the concept of sequences. + # + # For classification tasks, the first vector (corresponding to [CLS]) is + # used as the "sentence vector". Note that this only makes sense because + # the entire model is fine-tuned. + tokens = [] + segment_ids = [] + tokens.append("[CLS]") + segment_ids.append(0) + for token in tokens_a: + tokens.append(token) + segment_ids.append(0) + tokens.append("[SEP]") + segment_ids.append(0) + + if tokens_b: + for token in tokens_b: + tokens.append(token) + segment_ids.append(1) + tokens.append("[SEP]") + segment_ids.append(1) + + input_ids = tokenizer.convert_tokens_to_ids(tokens) + + # The mask has 1 for real tokens and 0 for padding tokens. Only real + # tokens are attended to. + input_mask = [1] * len(input_ids) + + # Zero-pad up to the sequence length. + while len(input_ids) < max_seq_length: + input_ids.append(0) + input_mask.append(0) + segment_ids.append(0) + + assert len(input_ids) == max_seq_length + assert len(input_mask) == max_seq_length + assert len(segment_ids) == max_seq_length + + label_id = label_map[example.label] + if ex_index < 5: + tf.logging.info("*** Example ***") + tf.logging.info("guid: %s" % (example.guid)) + tf.logging.info("tokens: %s" % " ".join( + [tokenization.printable_text(x) for x in tokens])) + tf.logging.info("input_ids: %s" % " ".join([str(x) for x in input_ids])) + tf.logging.info("input_mask: %s" % " ".join([str(x) for x in input_mask])) + tf.logging.info("segment_ids: %s" % " ".join([str(x) for x in segment_ids])) + tf.logging.info("label: %s (id = %d)" % (example.label, label_id)) + + feature = InputFeatures( + input_ids=input_ids, + input_mask=input_mask, + segment_ids=segment_ids, + label_id=label_id, + is_real_example=True) + return feature + + +def file_based_convert_examples_to_features( + examples, label_list, max_seq_length, tokenizer, output_file): + """Convert a set of `InputExample`s to a TFRecord file.""" + + writer = tf.python_io.TFRecordWriter(output_file) + + for (ex_index, example) in enumerate(examples): + if ex_index % 10000 == 0: + tf.logging.info("Writing example %d of %d" % (ex_index, len(examples))) + + feature = convert_single_example(ex_index, example, label_list, + max_seq_length, tokenizer) + + def create_int_feature(values): + f = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values))) + return f + + features = collections.OrderedDict() + features["input_ids"] = create_int_feature(feature.input_ids) + features["input_mask"] = create_int_feature(feature.input_mask) + features["segment_ids"] = create_int_feature(feature.segment_ids) + features["label_ids"] = create_int_feature([feature.label_id]) + features["is_real_example"] = create_int_feature( + [int(feature.is_real_example)]) + + tf_example = tf.train.Example(features=tf.train.Features(feature=features)) + writer.write(tf_example.SerializeToString()) + writer.close() + + +def file_based_input_fn_builder(input_file, seq_length, is_training, + drop_remainder): + """Creates an `input_fn` closure to be passed to TPUEstimator.""" + + name_to_features = { + "input_ids": tf.FixedLenFeature([seq_length], tf.int64), + "input_mask": tf.FixedLenFeature([seq_length], tf.int64), + "segment_ids": tf.FixedLenFeature([seq_length], tf.int64), + "label_ids": tf.FixedLenFeature([], tf.int64), + "is_real_example": tf.FixedLenFeature([], tf.int64), + } + + def _decode_record(record, name_to_features): + """Decodes a record to a TensorFlow example.""" + example = tf.parse_single_example(record, name_to_features) + + # tf.Example only supports tf.int64, but the TPU only supports tf.int32. + # So cast all int64 to int32. + for name in list(example.keys()): + t = example[name] + if t.dtype == tf.int64: + t = tf.to_int32(t) + example[name] = t + + return example + + def input_fn(params): + """The actual input function.""" + batch_size = params["batch_size"] + + # For training, we want a lot of parallel reading and shuffling. + # For eval, we want no shuffling and parallel reading doesn't matter. + d = tf.data.TFRecordDataset(input_file) + if is_training: + d = d.repeat() + d = d.shuffle(buffer_size=100) + + d = d.apply( + tf.contrib.data.map_and_batch( + lambda record: _decode_record(record, name_to_features), + batch_size=batch_size, + drop_remainder=drop_remainder)) + + return d + + return input_fn + + +def _truncate_seq_pair(tokens_a, tokens_b, max_length): + """Truncates a sequence pair in place to the maximum length.""" + + # This is a simple heuristic which will always truncate the longer sequence + # one token at a time. This makes more sense than truncating an equal percent + # of tokens from each, since if one sequence is very short then each token + # that's truncated likely contains more information than a longer sequence. + while True: + total_length = len(tokens_a) + len(tokens_b) + if total_length <= max_length: + break + if len(tokens_a) > len(tokens_b): + tokens_a.pop() + else: + tokens_b.pop() + + +def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, + labels, num_labels, use_one_hot_embeddings): + """Creates a classification model.""" + model = modeling.BertModel( + config=bert_config, + is_training=is_training, + input_ids=input_ids, + input_mask=input_mask, + token_type_ids=segment_ids, + use_one_hot_embeddings=use_one_hot_embeddings) + + # In the demo, we are doing a simple classification task on the entire + # segment. + # + # If you want to use the token-level output, use model.get_sequence_output() + # instead. + output_layer = model.get_pooled_output() + + hidden_size = output_layer.shape[-1].value + + output_weights = tf.get_variable( + "output_weights", [num_labels, hidden_size], + initializer=tf.truncated_normal_initializer(stddev=0.02)) + + output_bias = tf.get_variable( + "output_bias", [num_labels], initializer=tf.zeros_initializer()) + + with tf.variable_scope("loss"): + if is_training: + # I.e., 0.1 dropout + output_layer = tf.nn.dropout(output_layer, keep_prob=0.9) + + logits = tf.matmul(output_layer, output_weights, transpose_b=True) + logits = tf.nn.bias_add(logits, output_bias) + probabilities = tf.nn.softmax(logits, axis=-1) + log_probs = tf.nn.log_softmax(logits, axis=-1) + + one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32) + + per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1) + loss = tf.reduce_mean(per_example_loss) + + return (loss, per_example_loss, logits, probabilities) + + +def model_fn_builder(bert_config, num_labels, init_checkpoint, learning_rate, + num_train_steps, num_warmup_steps, use_tpu, + use_one_hot_embeddings): + """Returns `model_fn` closure for TPUEstimator.""" + + def model_fn(features, labels, mode, params): # pylint: disable=unused-argument + """The `model_fn` for TPUEstimator.""" + + tf.logging.info("*** Features ***") + for name in sorted(features.keys()): + tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) + + input_ids = features["input_ids"] + input_mask = features["input_mask"] + segment_ids = features["segment_ids"] + label_ids = features["label_ids"] + is_real_example = None + if "is_real_example" in features: + is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) + else: + is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) + + is_training = (mode == tf.estimator.ModeKeys.TRAIN) + + (total_loss, per_example_loss, logits, probabilities) = create_model( + bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, + num_labels, use_one_hot_embeddings) + + tvars = tf.trainable_variables() + initialized_variable_names = {} + scaffold_fn = None + if init_checkpoint: + (assignment_map, initialized_variable_names + ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) + if use_tpu: + + def tpu_scaffold(): + tf.train.init_from_checkpoint(init_checkpoint, assignment_map) + return tf.train.Scaffold() + + scaffold_fn = tpu_scaffold + else: + tf.train.init_from_checkpoint(init_checkpoint, assignment_map) + + tf.logging.info("**** Trainable Variables ****") + for var in tvars: + init_string = "" + if var.name in initialized_variable_names: + init_string = ", *INIT_FROM_CKPT*" + tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, + init_string) + + output_spec = None + if mode == tf.estimator.ModeKeys.TRAIN: + + train_op = optimization.create_optimizer( + total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) + + output_spec = tf.contrib.tpu.TPUEstimatorSpec( + mode=mode, + loss=total_loss, + train_op=train_op, + scaffold_fn=scaffold_fn) + elif mode == tf.estimator.ModeKeys.EVAL: + + def metric_fn(per_example_loss, label_ids, logits, is_real_example): + predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) + accuracy = tf.metrics.accuracy( + labels=label_ids, predictions=predictions, weights=is_real_example) + loss = tf.metrics.mean(values=per_example_loss, weights=is_real_example) + return { + "eval_accuracy": accuracy, + "eval_loss": loss, + } + + eval_metrics = (metric_fn, + [per_example_loss, label_ids, logits, is_real_example]) + output_spec = tf.contrib.tpu.TPUEstimatorSpec( + mode=mode, + loss=total_loss, + eval_metrics=eval_metrics, + scaffold_fn=scaffold_fn) + else: + output_spec = tf.contrib.tpu.TPUEstimatorSpec( + mode=mode, + predictions={"probabilities": probabilities}, + scaffold_fn=scaffold_fn) + return output_spec + + return model_fn + + +# This function is not used by this file but is still used by the Colab and +# people who depend on it. +def input_fn_builder(features, seq_length, is_training, drop_remainder): + """Creates an `input_fn` closure to be passed to TPUEstimator.""" + + all_input_ids = [] + all_input_mask = [] + all_segment_ids = [] + all_label_ids = [] + + for feature in features: + all_input_ids.append(feature.input_ids) + all_input_mask.append(feature.input_mask) + all_segment_ids.append(feature.segment_ids) + all_label_ids.append(feature.label_id) + + def input_fn(params): + """The actual input function.""" + batch_size = params["batch_size"] + + num_examples = len(features) + + # This is for demo purposes and does NOT scale to large data sets. We do + # not use Dataset.from_generator() because that uses tf.py_func which is + # not TPU compatible. The right way to load data is with TFRecordReader. + d = tf.data.Dataset.from_tensor_slices({ + "input_ids": + tf.constant( + all_input_ids, shape=[num_examples, seq_length], + dtype=tf.int32), + "input_mask": + tf.constant( + all_input_mask, + shape=[num_examples, seq_length], + dtype=tf.int32), + "segment_ids": + tf.constant( + all_segment_ids, + shape=[num_examples, seq_length], + dtype=tf.int32), + "label_ids": + tf.constant(all_label_ids, shape=[num_examples], dtype=tf.int32), + }) + + if is_training: + d = d.repeat() + d = d.shuffle(buffer_size=100) + + d = d.batch(batch_size=batch_size, drop_remainder=drop_remainder) + return d + + return input_fn + + +# This function is not used by this file but is still used by the Colab and +# people who depend on it. +def convert_examples_to_features(examples, label_list, max_seq_length, + tokenizer): + """Convert a set of `InputExample`s to a list of `InputFeatures`.""" + + features = [] + for (ex_index, example) in enumerate(examples): + if ex_index % 10000 == 0: + tf.logging.info("Writing example %d of %d" % (ex_index, len(examples))) + + feature = convert_single_example(ex_index, example, label_list, + max_seq_length, tokenizer) + + features.append(feature) + return features + + +def main(_): + tf.logging.set_verbosity(tf.logging.INFO) + + processors = { + "cola": ColaProcessor, + "mnli": MnliProcessor, + "mrpc": MrpcProcessor, + "xnli": XnliProcessor, + "ep": EPProcessor, + "eppdt": EPBPTProcessor, + } + + tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case, + FLAGS.init_checkpoint) + + if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict: + raise ValueError( + "At least one of `do_train`, `do_eval` or `do_predict' must be True.") + + bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) + + if FLAGS.max_seq_length > bert_config.max_position_embeddings: + raise ValueError( + "Cannot use sequence length %d because the BERT model " + "was only trained up to sequence length %d" % + (FLAGS.max_seq_length, bert_config.max_position_embeddings)) + + tf.gfile.MakeDirs(FLAGS.output_dir) + + task_name = FLAGS.task_name.lower() + + if task_name not in processors: + raise ValueError("Task not found: %s" % (task_name)) + + processor = processors[task_name]() + + label_list = processor.get_labels() + + tokenizer = tokenization.FullTokenizer( + vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) + + tpu_cluster_resolver = None + if FLAGS.use_tpu and FLAGS.tpu_name: + tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( + FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) + + is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2 + run_config = tf.contrib.tpu.RunConfig( + cluster=tpu_cluster_resolver, + master=FLAGS.master, + model_dir=FLAGS.output_dir, + save_checkpoints_steps=FLAGS.save_checkpoints_steps, + tpu_config=tf.contrib.tpu.TPUConfig( + iterations_per_loop=FLAGS.iterations_per_loop, + num_shards=FLAGS.num_tpu_cores, + per_host_input_for_training=is_per_host)) + + train_examples = None + num_train_steps = None + num_warmup_steps = None + if FLAGS.do_train: + train_examples = processor.get_train_examples(FLAGS.data_dir) + num_train_steps = int( + len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs) + num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) + + model_fn = model_fn_builder( + bert_config=bert_config, + num_labels=len(label_list), + init_checkpoint=FLAGS.init_checkpoint, + learning_rate=FLAGS.learning_rate, + num_train_steps=num_train_steps, + num_warmup_steps=num_warmup_steps, + use_tpu=FLAGS.use_tpu, + use_one_hot_embeddings=FLAGS.use_tpu) + + # If TPU is not available, this will fall back to normal Estimator on CPU + # or GPU. + estimator = tf.contrib.tpu.TPUEstimator( + use_tpu=FLAGS.use_tpu, + model_fn=model_fn, + config=run_config, + train_batch_size=FLAGS.train_batch_size, + eval_batch_size=FLAGS.eval_batch_size, + predict_batch_size=FLAGS.predict_batch_size) + + if FLAGS.do_train: + train_file = os.path.join(FLAGS.output_dir, "train.tf_record") + file_based_convert_examples_to_features( + train_examples, label_list, FLAGS.max_seq_length, tokenizer, train_file) + tf.logging.info("***** Running training *****") + tf.logging.info(" Num examples = %d", len(train_examples)) + tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) + tf.logging.info(" Num steps = %d", num_train_steps) + train_input_fn = file_based_input_fn_builder( + input_file=train_file, + seq_length=FLAGS.max_seq_length, + is_training=True, + drop_remainder=True) + estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) + + if FLAGS.do_eval: + eval_examples = processor.get_dev_examples(FLAGS.data_dir) + num_actual_eval_examples = len(eval_examples) + if FLAGS.use_tpu: + # TPU requires a fixed batch size for all batches, therefore the number + # of examples must be a multiple of the batch size, or else examples + # will get dropped. So we pad with fake examples which are ignored + # later on. These do NOT count towards the metric (all tf.metrics + # support a per-instance weight, and these get a weight of 0.0). + while len(eval_examples) % FLAGS.eval_batch_size != 0: + eval_examples.append(PaddingInputExample()) + + eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record") + file_based_convert_examples_to_features( + eval_examples, label_list, FLAGS.max_seq_length, tokenizer, eval_file) + + tf.logging.info("***** Running evaluation *****") + tf.logging.info(" Num examples = %d (%d actual, %d padding)", + len(eval_examples), num_actual_eval_examples, + len(eval_examples) - num_actual_eval_examples) + tf.logging.info(" Batch size = %d", FLAGS.eval_batch_size) + + # This tells the estimator to run through the entire set. + eval_steps = None + # However, if running eval on the TPU, you will need to specify the + # number of steps. + if FLAGS.use_tpu: + assert len(eval_examples) % FLAGS.eval_batch_size == 0 + eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size) + + eval_drop_remainder = True if FLAGS.use_tpu else False + eval_input_fn = file_based_input_fn_builder( + input_file=eval_file, + seq_length=FLAGS.max_seq_length, + is_training=False, + drop_remainder=eval_drop_remainder) + + result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps) + + output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") + with tf.gfile.GFile(output_eval_file, "w") as writer: + tf.logging.info("***** Eval results *****") + for key in sorted(result.keys()): + tf.logging.info(" %s = %s", key, str(result[key])) + writer.write("%s = %s\n" % (key, str(result[key]))) + + if FLAGS.do_predict: + predict_examples = processor.get_test_examples(FLAGS.data_dir) + num_actual_predict_examples = len(predict_examples) + if FLAGS.use_tpu: + # TPU requires a fixed batch size for all batches, therefore the number + # of examples must be a multiple of the batch size, or else examples + # will get dropped. So we pad with fake examples which are ignored + # later on. + while len(predict_examples) % FLAGS.predict_batch_size != 0: + predict_examples.append(PaddingInputExample()) + + predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record") + file_based_convert_examples_to_features(predict_examples, label_list, + FLAGS.max_seq_length, tokenizer, + predict_file) + + tf.logging.info("***** Running prediction*****") + tf.logging.info(" Num examples = %d (%d actual, %d padding)", + len(predict_examples), num_actual_predict_examples, + len(predict_examples) - num_actual_predict_examples) + tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size) + + predict_drop_remainder = True if FLAGS.use_tpu else False + predict_input_fn = file_based_input_fn_builder( + input_file=predict_file, + seq_length=FLAGS.max_seq_length, + is_training=False, + drop_remainder=predict_drop_remainder) + + result = estimator.predict(input_fn=predict_input_fn) + + output_predict_file = os.path.join(FLAGS.output_dir, "test_results.tsv") + with tf.gfile.GFile(output_predict_file, "w") as writer: + num_written_lines = 0 + tf.logging.info("***** Predict results *****") + for (i, prediction) in enumerate(result): + probabilities = prediction["probabilities"] + if i >= num_actual_predict_examples: + break + output_line = "\t".join( + str(class_probability) + for class_probability in probabilities) + "\n" + writer.write(output_line) + num_written_lines += 1 + assert num_written_lines == num_actual_predict_examples + + +if __name__ == "__main__": + flags.mark_flag_as_required("data_dir") + flags.mark_flag_as_required("task_name") + flags.mark_flag_as_required("vocab_file") + flags.mark_flag_as_required("bert_config_file") + flags.mark_flag_as_required("output_dir") + tf.app.run() diff --git a/run_classifier_with_tfhub.py b/run_classifier_with_tfhub.py new file mode 100644 index 0000000..9d2f80f --- /dev/null +++ b/run_classifier_with_tfhub.py @@ -0,0 +1,314 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""BERT finetuning runner with TF-Hub.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import optimization +import run_classifier +import tokenization +import tensorflow as tf +import tensorflow_hub as hub + +flags = tf.flags + +FLAGS = flags.FLAGS + +flags.DEFINE_string( + "bert_hub_module_handle", None, + "Handle for the BERT TF-Hub module.") + + +def create_model(is_training, input_ids, input_mask, segment_ids, labels, + num_labels, bert_hub_module_handle): + """Creates a classification model.""" + tags = set() + if is_training: + tags.add("train") + bert_module = hub.Module(bert_hub_module_handle, tags=tags, trainable=True) + bert_inputs = dict( + input_ids=input_ids, + input_mask=input_mask, + segment_ids=segment_ids) + bert_outputs = bert_module( + inputs=bert_inputs, + signature="tokens", + as_dict=True) + + # In the demo, we are doing a simple classification task on the entire + # segment. + # + # If you want to use the token-level output, use + # bert_outputs["sequence_output"] instead. + output_layer = bert_outputs["pooled_output"] + + hidden_size = output_layer.shape[-1].value + + output_weights = tf.get_variable( + "output_weights", [num_labels, hidden_size], + initializer=tf.truncated_normal_initializer(stddev=0.02)) + + output_bias = tf.get_variable( + "output_bias", [num_labels], initializer=tf.zeros_initializer()) + + with tf.variable_scope("loss"): + if is_training: + # I.e., 0.1 dropout + output_layer = tf.nn.dropout(output_layer, keep_prob=0.9) + + logits = tf.matmul(output_layer, output_weights, transpose_b=True) + logits = tf.nn.bias_add(logits, output_bias) + probabilities = tf.nn.softmax(logits, axis=-1) + log_probs = tf.nn.log_softmax(logits, axis=-1) + + one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32) + + per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1) + loss = tf.reduce_mean(per_example_loss) + + return (loss, per_example_loss, logits, probabilities) + + +def model_fn_builder(num_labels, learning_rate, num_train_steps, + num_warmup_steps, use_tpu, bert_hub_module_handle): + """Returns `model_fn` closure for TPUEstimator.""" + + def model_fn(features, labels, mode, params): # pylint: disable=unused-argument + """The `model_fn` for TPUEstimator.""" + + tf.logging.info("*** Features ***") + for name in sorted(features.keys()): + tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) + + input_ids = features["input_ids"] + input_mask = features["input_mask"] + segment_ids = features["segment_ids"] + label_ids = features["label_ids"] + + is_training = (mode == tf.estimator.ModeKeys.TRAIN) + + (total_loss, per_example_loss, logits, probabilities) = create_model( + is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, + bert_hub_module_handle) + + output_spec = None + if mode == tf.estimator.ModeKeys.TRAIN: + train_op = optimization.create_optimizer( + total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) + + output_spec = tf.contrib.tpu.TPUEstimatorSpec( + mode=mode, + loss=total_loss, + train_op=train_op) + elif mode == tf.estimator.ModeKeys.EVAL: + + def metric_fn(per_example_loss, label_ids, logits): + predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) + accuracy = tf.metrics.accuracy(label_ids, predictions) + loss = tf.metrics.mean(per_example_loss) + return { + "eval_accuracy": accuracy, + "eval_loss": loss, + } + + eval_metrics = (metric_fn, [per_example_loss, label_ids, logits]) + output_spec = tf.contrib.tpu.TPUEstimatorSpec( + mode=mode, + loss=total_loss, + eval_metrics=eval_metrics) + elif mode == tf.estimator.ModeKeys.PREDICT: + output_spec = tf.contrib.tpu.TPUEstimatorSpec( + mode=mode, predictions={"probabilities": probabilities}) + else: + raise ValueError( + "Only TRAIN, EVAL and PREDICT modes are supported: %s" % (mode)) + + return output_spec + + return model_fn + + +def create_tokenizer_from_hub_module(bert_hub_module_handle): + """Get the vocab file and casing info from the Hub module.""" + with tf.Graph().as_default(): + bert_module = hub.Module(bert_hub_module_handle) + tokenization_info = bert_module(signature="tokenization_info", as_dict=True) + with tf.Session() as sess: + vocab_file, do_lower_case = sess.run([tokenization_info["vocab_file"], + tokenization_info["do_lower_case"]]) + return tokenization.FullTokenizer( + vocab_file=vocab_file, do_lower_case=do_lower_case) + + +def main(_): + tf.logging.set_verbosity(tf.logging.INFO) + + processors = { + "cola": run_classifier.ColaProcessor, + "mnli": run_classifier.MnliProcessor, + "mrpc": run_classifier.MrpcProcessor, + } + + if not FLAGS.do_train and not FLAGS.do_eval: + raise ValueError("At least one of `do_train` or `do_eval` must be True.") + + tf.gfile.MakeDirs(FLAGS.output_dir) + + task_name = FLAGS.task_name.lower() + + if task_name not in processors: + raise ValueError("Task not found: %s" % (task_name)) + + processor = processors[task_name]() + + label_list = processor.get_labels() + + tokenizer = create_tokenizer_from_hub_module(FLAGS.bert_hub_module_handle) + + tpu_cluster_resolver = None + if FLAGS.use_tpu and FLAGS.tpu_name: + tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( + FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) + + is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2 + run_config = tf.contrib.tpu.RunConfig( + cluster=tpu_cluster_resolver, + master=FLAGS.master, + model_dir=FLAGS.output_dir, + save_checkpoints_steps=FLAGS.save_checkpoints_steps, + tpu_config=tf.contrib.tpu.TPUConfig( + iterations_per_loop=FLAGS.iterations_per_loop, + num_shards=FLAGS.num_tpu_cores, + per_host_input_for_training=is_per_host)) + + train_examples = None + num_train_steps = None + num_warmup_steps = None + if FLAGS.do_train: + train_examples = processor.get_train_examples(FLAGS.data_dir) + num_train_steps = int( + len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs) + num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) + + model_fn = model_fn_builder( + num_labels=len(label_list), + learning_rate=FLAGS.learning_rate, + num_train_steps=num_train_steps, + num_warmup_steps=num_warmup_steps, + use_tpu=FLAGS.use_tpu, + bert_hub_module_handle=FLAGS.bert_hub_module_handle) + + # If TPU is not available, this will fall back to normal Estimator on CPU + # or GPU. + estimator = tf.contrib.tpu.TPUEstimator( + use_tpu=FLAGS.use_tpu, + model_fn=model_fn, + config=run_config, + train_batch_size=FLAGS.train_batch_size, + eval_batch_size=FLAGS.eval_batch_size, + predict_batch_size=FLAGS.predict_batch_size) + + if FLAGS.do_train: + train_features = run_classifier.convert_examples_to_features( + train_examples, label_list, FLAGS.max_seq_length, tokenizer) + tf.logging.info("***** Running training *****") + tf.logging.info(" Num examples = %d", len(train_examples)) + tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) + tf.logging.info(" Num steps = %d", num_train_steps) + train_input_fn = run_classifier.input_fn_builder( + features=train_features, + seq_length=FLAGS.max_seq_length, + is_training=True, + drop_remainder=True) + estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) + + if FLAGS.do_eval: + eval_examples = processor.get_dev_examples(FLAGS.data_dir) + eval_features = run_classifier.convert_examples_to_features( + eval_examples, label_list, FLAGS.max_seq_length, tokenizer) + + tf.logging.info("***** Running evaluation *****") + tf.logging.info(" Num examples = %d", len(eval_examples)) + tf.logging.info(" Batch size = %d", FLAGS.eval_batch_size) + + # This tells the estimator to run through the entire set. + eval_steps = None + # However, if running eval on the TPU, you will need to specify the + # number of steps. + if FLAGS.use_tpu: + # Eval will be slightly WRONG on the TPU because it will truncate + # the last batch. + eval_steps = int(len(eval_examples) / FLAGS.eval_batch_size) + + eval_drop_remainder = True if FLAGS.use_tpu else False + eval_input_fn = run_classifier.input_fn_builder( + features=eval_features, + seq_length=FLAGS.max_seq_length, + is_training=False, + drop_remainder=eval_drop_remainder) + + result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps) + + output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") + with tf.gfile.GFile(output_eval_file, "w") as writer: + tf.logging.info("***** Eval results *****") + for key in sorted(result.keys()): + tf.logging.info(" %s = %s", key, str(result[key])) + writer.write("%s = %s\n" % (key, str(result[key]))) + + if FLAGS.do_predict: + predict_examples = processor.get_test_examples(FLAGS.data_dir) + if FLAGS.use_tpu: + # Discard batch remainder if running on TPU + n = len(predict_examples) + predict_examples = predict_examples[:(n - n % FLAGS.predict_batch_size)] + + predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record") + run_classifier.file_based_convert_examples_to_features( + predict_examples, label_list, FLAGS.max_seq_length, tokenizer, + predict_file) + + tf.logging.info("***** Running prediction*****") + tf.logging.info(" Num examples = %d", len(predict_examples)) + tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size) + + predict_input_fn = run_classifier.file_based_input_fn_builder( + input_file=predict_file, + seq_length=FLAGS.max_seq_length, + is_training=False, + drop_remainder=FLAGS.use_tpu) + + result = estimator.predict(input_fn=predict_input_fn) + + output_predict_file = os.path.join(FLAGS.output_dir, "test_results.tsv") + with tf.gfile.GFile(output_predict_file, "w") as writer: + tf.logging.info("***** Predict results *****") + for prediction in result: + probabilities = prediction["probabilities"] + output_line = "\t".join( + str(class_probability) + for class_probability in probabilities) + "\n" + writer.write(output_line) + + +if __name__ == "__main__": + flags.mark_flag_as_required("data_dir") + flags.mark_flag_as_required("task_name") + flags.mark_flag_as_required("bert_hub_module_handle") + flags.mark_flag_as_required("output_dir") + tf.app.run() diff --git a/run_pretraining.py b/run_pretraining.py new file mode 100644 index 0000000..b118f62 --- /dev/null +++ b/run_pretraining.py @@ -0,0 +1,493 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Run masked LM/next sentence masked_lm pre-training for BERT.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import modeling +import optimization +import tensorflow as tf + +flags = tf.flags + +FLAGS = flags.FLAGS + +## Required parameters +flags.DEFINE_string( + "bert_config_file", None, + "The config json file corresponding to the pre-trained BERT model. " + "This specifies the model architecture.") + +flags.DEFINE_string( + "input_file", None, + "Input TF example files (can be a glob or comma separated).") + +flags.DEFINE_string( + "output_dir", None, + "The output directory where the model checkpoints will be written.") + +## Other parameters +flags.DEFINE_string( + "init_checkpoint", None, + "Initial checkpoint (usually from a pre-trained BERT model).") + +flags.DEFINE_integer( + "max_seq_length", 128, + "The maximum total input sequence length after WordPiece tokenization. " + "Sequences longer than this will be truncated, and sequences shorter " + "than this will be padded. Must match data generation.") + +flags.DEFINE_integer( + "max_predictions_per_seq", 20, + "Maximum number of masked LM predictions per sequence. " + "Must match data generation.") + +flags.DEFINE_bool("do_train", False, "Whether to run training.") + +flags.DEFINE_bool("do_eval", False, "Whether to run eval on the dev set.") + +flags.DEFINE_integer("train_batch_size", 32, "Total batch size for training.") + +flags.DEFINE_integer("eval_batch_size", 8, "Total batch size for eval.") + +flags.DEFINE_float("learning_rate", 5e-5, "The initial learning rate for Adam.") + +flags.DEFINE_integer("num_train_steps", 100000, "Number of training steps.") + +flags.DEFINE_integer("num_warmup_steps", 10000, "Number of warmup steps.") + +flags.DEFINE_integer("save_checkpoints_steps", 1000, + "How often to save the model checkpoint.") + +flags.DEFINE_integer("iterations_per_loop", 1000, + "How many steps to make in each estimator call.") + +flags.DEFINE_integer("max_eval_steps", 100, "Maximum number of eval steps.") + +flags.DEFINE_bool("use_tpu", False, "Whether to use TPU or GPU/CPU.") + +tf.flags.DEFINE_string( + "tpu_name", None, + "The Cloud TPU to use for training. This should be either the name " + "used when creating the Cloud TPU, or a grpc://ip.address.of.tpu:8470 " + "url.") + +tf.flags.DEFINE_string( + "tpu_zone", None, + "[Optional] GCE zone where the Cloud TPU is located in. If not " + "specified, we will attempt to automatically detect the GCE project from " + "metadata.") + +tf.flags.DEFINE_string( + "gcp_project", None, + "[Optional] Project name for the Cloud TPU-enabled project. If not " + "specified, we will attempt to automatically detect the GCE project from " + "metadata.") + +tf.flags.DEFINE_string("master", None, "[Optional] TensorFlow master URL.") + +flags.DEFINE_integer( + "num_tpu_cores", 8, + "Only used if `use_tpu` is True. Total number of TPU cores to use.") + + +def model_fn_builder(bert_config, init_checkpoint, learning_rate, + num_train_steps, num_warmup_steps, use_tpu, + use_one_hot_embeddings): + """Returns `model_fn` closure for TPUEstimator.""" + + def model_fn(features, labels, mode, params): # pylint: disable=unused-argument + """The `model_fn` for TPUEstimator.""" + + tf.logging.info("*** Features ***") + for name in sorted(features.keys()): + tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) + + input_ids = features["input_ids"] + input_mask = features["input_mask"] + segment_ids = features["segment_ids"] + masked_lm_positions = features["masked_lm_positions"] + masked_lm_ids = features["masked_lm_ids"] + masked_lm_weights = features["masked_lm_weights"] + next_sentence_labels = features["next_sentence_labels"] + + is_training = (mode == tf.estimator.ModeKeys.TRAIN) + + model = modeling.BertModel( + config=bert_config, + is_training=is_training, + input_ids=input_ids, + input_mask=input_mask, + token_type_ids=segment_ids, + use_one_hot_embeddings=use_one_hot_embeddings) + + (masked_lm_loss, + masked_lm_example_loss, masked_lm_log_probs) = get_masked_lm_output( + bert_config, model.get_sequence_output(), model.get_embedding_table(), + masked_lm_positions, masked_lm_ids, masked_lm_weights) + + (next_sentence_loss, next_sentence_example_loss, + next_sentence_log_probs) = get_next_sentence_output( + bert_config, model.get_pooled_output(), next_sentence_labels) + + total_loss = masked_lm_loss + next_sentence_loss + + tvars = tf.trainable_variables() + + initialized_variable_names = {} + scaffold_fn = None + if init_checkpoint: + (assignment_map, initialized_variable_names + ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) + if use_tpu: + + def tpu_scaffold(): + tf.train.init_from_checkpoint(init_checkpoint, assignment_map) + return tf.train.Scaffold() + + scaffold_fn = tpu_scaffold + else: + tf.train.init_from_checkpoint(init_checkpoint, assignment_map) + + tf.logging.info("**** Trainable Variables ****") + for var in tvars: + init_string = "" + if var.name in initialized_variable_names: + init_string = ", *INIT_FROM_CKPT*" + tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, + init_string) + + output_spec = None + if mode == tf.estimator.ModeKeys.TRAIN: + train_op = optimization.create_optimizer( + total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) + + output_spec = tf.contrib.tpu.TPUEstimatorSpec( + mode=mode, + loss=total_loss, + train_op=train_op, + scaffold_fn=scaffold_fn) + elif mode == tf.estimator.ModeKeys.EVAL: + + def metric_fn(masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, + masked_lm_weights, next_sentence_example_loss, + next_sentence_log_probs, next_sentence_labels): + """Computes the loss and accuracy of the model.""" + masked_lm_log_probs = tf.reshape(masked_lm_log_probs, + [-1, masked_lm_log_probs.shape[-1]]) + masked_lm_predictions = tf.argmax( + masked_lm_log_probs, axis=-1, output_type=tf.int32) + masked_lm_example_loss = tf.reshape(masked_lm_example_loss, [-1]) + masked_lm_ids = tf.reshape(masked_lm_ids, [-1]) + masked_lm_weights = tf.reshape(masked_lm_weights, [-1]) + masked_lm_accuracy = tf.metrics.accuracy( + labels=masked_lm_ids, + predictions=masked_lm_predictions, + weights=masked_lm_weights) + masked_lm_mean_loss = tf.metrics.mean( + values=masked_lm_example_loss, weights=masked_lm_weights) + + next_sentence_log_probs = tf.reshape( + next_sentence_log_probs, [-1, next_sentence_log_probs.shape[-1]]) + next_sentence_predictions = tf.argmax( + next_sentence_log_probs, axis=-1, output_type=tf.int32) + next_sentence_labels = tf.reshape(next_sentence_labels, [-1]) + next_sentence_accuracy = tf.metrics.accuracy( + labels=next_sentence_labels, predictions=next_sentence_predictions) + next_sentence_mean_loss = tf.metrics.mean( + values=next_sentence_example_loss) + + return { + "masked_lm_accuracy": masked_lm_accuracy, + "masked_lm_loss": masked_lm_mean_loss, + "next_sentence_accuracy": next_sentence_accuracy, + "next_sentence_loss": next_sentence_mean_loss, + } + + eval_metrics = (metric_fn, [ + masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, + masked_lm_weights, next_sentence_example_loss, + next_sentence_log_probs, next_sentence_labels + ]) + output_spec = tf.contrib.tpu.TPUEstimatorSpec( + mode=mode, + loss=total_loss, + eval_metrics=eval_metrics, + scaffold_fn=scaffold_fn) + else: + raise ValueError("Only TRAIN and EVAL modes are supported: %s" % (mode)) + + return output_spec + + return model_fn + + +def get_masked_lm_output(bert_config, input_tensor, output_weights, positions, + label_ids, label_weights): + """Get loss and log probs for the masked LM.""" + input_tensor = gather_indexes(input_tensor, positions) + + with tf.variable_scope("cls/predictions"): + # We apply one more non-linear transformation before the output layer. + # This matrix is not used after pre-training. + with tf.variable_scope("transform"): + input_tensor = tf.layers.dense( + input_tensor, + units=bert_config.hidden_size, + activation=modeling.get_activation(bert_config.hidden_act), + kernel_initializer=modeling.create_initializer( + bert_config.initializer_range)) + input_tensor = modeling.layer_norm(input_tensor) + + # The output weights are the same as the input embeddings, but there is + # an output-only bias for each token. + output_bias = tf.get_variable( + "output_bias", + shape=[bert_config.vocab_size], + initializer=tf.zeros_initializer()) + logits = tf.matmul(input_tensor, output_weights, transpose_b=True) + logits = tf.nn.bias_add(logits, output_bias) + log_probs = tf.nn.log_softmax(logits, axis=-1) + + label_ids = tf.reshape(label_ids, [-1]) + label_weights = tf.reshape(label_weights, [-1]) + + one_hot_labels = tf.one_hot( + label_ids, depth=bert_config.vocab_size, dtype=tf.float32) + + # The `positions` tensor might be zero-padded (if the sequence is too + # short to have the maximum number of predictions). The `label_weights` + # tensor has a value of 1.0 for every real prediction and 0.0 for the + # padding predictions. + per_example_loss = -tf.reduce_sum(log_probs * one_hot_labels, axis=[-1]) + numerator = tf.reduce_sum(label_weights * per_example_loss) + denominator = tf.reduce_sum(label_weights) + 1e-5 + loss = numerator / denominator + + return (loss, per_example_loss, log_probs) + + +def get_next_sentence_output(bert_config, input_tensor, labels): + """Get loss and log probs for the next sentence prediction.""" + + # Simple binary classification. Note that 0 is "next sentence" and 1 is + # "random sentence". This weight matrix is not used after pre-training. + with tf.variable_scope("cls/seq_relationship"): + output_weights = tf.get_variable( + "output_weights", + shape=[2, bert_config.hidden_size], + initializer=modeling.create_initializer(bert_config.initializer_range)) + output_bias = tf.get_variable( + "output_bias", shape=[2], initializer=tf.zeros_initializer()) + + logits = tf.matmul(input_tensor, output_weights, transpose_b=True) + logits = tf.nn.bias_add(logits, output_bias) + log_probs = tf.nn.log_softmax(logits, axis=-1) + labels = tf.reshape(labels, [-1]) + one_hot_labels = tf.one_hot(labels, depth=2, dtype=tf.float32) + per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1) + loss = tf.reduce_mean(per_example_loss) + return (loss, per_example_loss, log_probs) + + +def gather_indexes(sequence_tensor, positions): + """Gathers the vectors at the specific positions over a minibatch.""" + sequence_shape = modeling.get_shape_list(sequence_tensor, expected_rank=3) + batch_size = sequence_shape[0] + seq_length = sequence_shape[1] + width = sequence_shape[2] + + flat_offsets = tf.reshape( + tf.range(0, batch_size, dtype=tf.int32) * seq_length, [-1, 1]) + flat_positions = tf.reshape(positions + flat_offsets, [-1]) + flat_sequence_tensor = tf.reshape(sequence_tensor, + [batch_size * seq_length, width]) + output_tensor = tf.gather(flat_sequence_tensor, flat_positions) + return output_tensor + + +def input_fn_builder(input_files, + max_seq_length, + max_predictions_per_seq, + is_training, + num_cpu_threads=4): + """Creates an `input_fn` closure to be passed to TPUEstimator.""" + + def input_fn(params): + """The actual input function.""" + batch_size = params["batch_size"] + + name_to_features = { + "input_ids": + tf.FixedLenFeature([max_seq_length], tf.int64), + "input_mask": + tf.FixedLenFeature([max_seq_length], tf.int64), + "segment_ids": + tf.FixedLenFeature([max_seq_length], tf.int64), + "masked_lm_positions": + tf.FixedLenFeature([max_predictions_per_seq], tf.int64), + "masked_lm_ids": + tf.FixedLenFeature([max_predictions_per_seq], tf.int64), + "masked_lm_weights": + tf.FixedLenFeature([max_predictions_per_seq], tf.float32), + "next_sentence_labels": + tf.FixedLenFeature([1], tf.int64), + } + + # For training, we want a lot of parallel reading and shuffling. + # For eval, we want no shuffling and parallel reading doesn't matter. + if is_training: + d = tf.data.Dataset.from_tensor_slices(tf.constant(input_files)) + d = d.repeat() + d = d.shuffle(buffer_size=len(input_files)) + + # `cycle_length` is the number of parallel files that get read. + cycle_length = min(num_cpu_threads, len(input_files)) + + # `sloppy` mode means that the interleaving is not exact. This adds + # even more randomness to the training pipeline. + d = d.apply( + tf.contrib.data.parallel_interleave( + tf.data.TFRecordDataset, + sloppy=is_training, + cycle_length=cycle_length)) + d = d.shuffle(buffer_size=100) + else: + d = tf.data.TFRecordDataset(input_files) + # Since we evaluate for a fixed number of steps we don't want to encounter + # out-of-range exceptions. + d = d.repeat() + + # We must `drop_remainder` on training because the TPU requires fixed + # size dimensions. For eval, we assume we are evaluating on the CPU or GPU + # and we *don't* want to drop the remainder, otherwise we wont cover + # every sample. + d = d.apply( + tf.contrib.data.map_and_batch( + lambda record: _decode_record(record, name_to_features), + batch_size=batch_size, + num_parallel_batches=num_cpu_threads, + drop_remainder=True)) + return d + + return input_fn + + +def _decode_record(record, name_to_features): + """Decodes a record to a TensorFlow example.""" + example = tf.parse_single_example(record, name_to_features) + + # tf.Example only supports tf.int64, but the TPU only supports tf.int32. + # So cast all int64 to int32. + for name in list(example.keys()): + t = example[name] + if t.dtype == tf.int64: + t = tf.to_int32(t) + example[name] = t + + return example + + +def main(_): + tf.logging.set_verbosity(tf.logging.INFO) + + if not FLAGS.do_train and not FLAGS.do_eval: + raise ValueError("At least one of `do_train` or `do_eval` must be True.") + + bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) + + tf.gfile.MakeDirs(FLAGS.output_dir) + + input_files = [] + for input_pattern in FLAGS.input_file.split(","): + input_files.extend(tf.gfile.Glob(input_pattern)) + + tf.logging.info("*** Input Files ***") + for input_file in input_files: + tf.logging.info(" %s" % input_file) + + tpu_cluster_resolver = None + if FLAGS.use_tpu and FLAGS.tpu_name: + tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( + FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) + + is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2 + run_config = tf.contrib.tpu.RunConfig( + cluster=tpu_cluster_resolver, + master=FLAGS.master, + model_dir=FLAGS.output_dir, + save_checkpoints_steps=FLAGS.save_checkpoints_steps, + tpu_config=tf.contrib.tpu.TPUConfig( + iterations_per_loop=FLAGS.iterations_per_loop, + num_shards=FLAGS.num_tpu_cores, + per_host_input_for_training=is_per_host)) + + model_fn = model_fn_builder( + bert_config=bert_config, + init_checkpoint=FLAGS.init_checkpoint, + learning_rate=FLAGS.learning_rate, + num_train_steps=FLAGS.num_train_steps, + num_warmup_steps=FLAGS.num_warmup_steps, + use_tpu=FLAGS.use_tpu, + use_one_hot_embeddings=FLAGS.use_tpu) + + # If TPU is not available, this will fall back to normal Estimator on CPU + # or GPU. + estimator = tf.contrib.tpu.TPUEstimator( + use_tpu=FLAGS.use_tpu, + model_fn=model_fn, + config=run_config, + train_batch_size=FLAGS.train_batch_size, + eval_batch_size=FLAGS.eval_batch_size) + + if FLAGS.do_train: + tf.logging.info("***** Running training *****") + tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) + train_input_fn = input_fn_builder( + input_files=input_files, + max_seq_length=FLAGS.max_seq_length, + max_predictions_per_seq=FLAGS.max_predictions_per_seq, + is_training=True) + estimator.train(input_fn=train_input_fn, max_steps=FLAGS.num_train_steps) + + if FLAGS.do_eval: + tf.logging.info("***** Running evaluation *****") + tf.logging.info(" Batch size = %d", FLAGS.eval_batch_size) + + eval_input_fn = input_fn_builder( + input_files=input_files, + max_seq_length=FLAGS.max_seq_length, + max_predictions_per_seq=FLAGS.max_predictions_per_seq, + is_training=False) + + result = estimator.evaluate( + input_fn=eval_input_fn, steps=FLAGS.max_eval_steps) + + output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") + with tf.gfile.GFile(output_eval_file, "w") as writer: + tf.logging.info("***** Eval results *****") + for key in sorted(result.keys()): + tf.logging.info(" %s = %s", key, str(result[key])) + writer.write("%s = %s\n" % (key, str(result[key]))) + + +if __name__ == "__main__": + flags.mark_flag_as_required("input_file") + flags.mark_flag_as_required("bert_config_file") + flags.mark_flag_as_required("output_dir") + tf.app.run() diff --git a/run_squad.py b/run_squad.py new file mode 100644 index 0000000..edd4c3e --- /dev/null +++ b/run_squad.py @@ -0,0 +1,1283 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Run BERT on SQuAD 1.1 and SQuAD 2.0.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import json +import math +import os +import random +import modeling +import optimization +import tokenization +import six +import tensorflow as tf + +flags = tf.flags + +FLAGS = flags.FLAGS + +## Required parameters +flags.DEFINE_string( + "bert_config_file", None, + "The config json file corresponding to the pre-trained BERT model. " + "This specifies the model architecture.") + +flags.DEFINE_string("vocab_file", None, + "The vocabulary file that the BERT model was trained on.") + +flags.DEFINE_string( + "output_dir", None, + "The output directory where the model checkpoints will be written.") + +## Other parameters +flags.DEFINE_string("train_file", None, + "SQuAD json for training. E.g., train-v1.1.json") + +flags.DEFINE_string( + "predict_file", None, + "SQuAD json for predictions. E.g., dev-v1.1.json or test-v1.1.json") + +flags.DEFINE_string( + "init_checkpoint", None, + "Initial checkpoint (usually from a pre-trained BERT model).") + +flags.DEFINE_bool( + "do_lower_case", True, + "Whether to lower case the input text. Should be True for uncased " + "models and False for cased models.") + +flags.DEFINE_integer( + "max_seq_length", 384, + "The maximum total input sequence length after WordPiece tokenization. " + "Sequences longer than this will be truncated, and sequences shorter " + "than this will be padded.") + +flags.DEFINE_integer( + "doc_stride", 128, + "When splitting up a long document into chunks, how much stride to " + "take between chunks.") + +flags.DEFINE_integer( + "max_query_length", 64, + "The maximum number of tokens for the question. Questions longer than " + "this will be truncated to this length.") + +flags.DEFINE_bool("do_train", False, "Whether to run training.") + +flags.DEFINE_bool("do_predict", False, "Whether to run eval on the dev set.") + +flags.DEFINE_integer("train_batch_size", 32, "Total batch size for training.") + +flags.DEFINE_integer("predict_batch_size", 8, + "Total batch size for predictions.") + +flags.DEFINE_float("learning_rate", 5e-5, "The initial learning rate for Adam.") + +flags.DEFINE_float("num_train_epochs", 3.0, + "Total number of training epochs to perform.") + +flags.DEFINE_float( + "warmup_proportion", 0.1, + "Proportion of training to perform linear learning rate warmup for. " + "E.g., 0.1 = 10% of training.") + +flags.DEFINE_integer("save_checkpoints_steps", 1000, + "How often to save the model checkpoint.") + +flags.DEFINE_integer("iterations_per_loop", 1000, + "How many steps to make in each estimator call.") + +flags.DEFINE_integer( + "n_best_size", 20, + "The total number of n-best predictions to generate in the " + "nbest_predictions.json output file.") + +flags.DEFINE_integer( + "max_answer_length", 30, + "The maximum length of an answer that can be generated. This is needed " + "because the start and end predictions are not conditioned on one another.") + +flags.DEFINE_bool("use_tpu", False, "Whether to use TPU or GPU/CPU.") + +tf.flags.DEFINE_string( + "tpu_name", None, + "The Cloud TPU to use for training. This should be either the name " + "used when creating the Cloud TPU, or a grpc://ip.address.of.tpu:8470 " + "url.") + +tf.flags.DEFINE_string( + "tpu_zone", None, + "[Optional] GCE zone where the Cloud TPU is located in. If not " + "specified, we will attempt to automatically detect the GCE project from " + "metadata.") + +tf.flags.DEFINE_string( + "gcp_project", None, + "[Optional] Project name for the Cloud TPU-enabled project. If not " + "specified, we will attempt to automatically detect the GCE project from " + "metadata.") + +tf.flags.DEFINE_string("master", None, "[Optional] TensorFlow master URL.") + +flags.DEFINE_integer( + "num_tpu_cores", 8, + "Only used if `use_tpu` is True. Total number of TPU cores to use.") + +flags.DEFINE_bool( + "verbose_logging", False, + "If true, all of the warnings related to data processing will be printed. " + "A number of warnings are expected for a normal SQuAD evaluation.") + +flags.DEFINE_bool( + "version_2_with_negative", False, + "If true, the SQuAD examples contain some that do not have an answer.") + +flags.DEFINE_float( + "null_score_diff_threshold", 0.0, + "If null_score - best_non_null is greater than the threshold predict null.") + + +class SquadExample(object): + """A single training/test example for simple sequence classification. + + For examples without an answer, the start and end position are -1. + """ + + def __init__(self, + qas_id, + question_text, + doc_tokens, + orig_answer_text=None, + start_position=None, + end_position=None, + is_impossible=False): + self.qas_id = qas_id + self.question_text = question_text + self.doc_tokens = doc_tokens + self.orig_answer_text = orig_answer_text + self.start_position = start_position + self.end_position = end_position + self.is_impossible = is_impossible + + def __str__(self): + return self.__repr__() + + def __repr__(self): + s = "" + s += "qas_id: %s" % (tokenization.printable_text(self.qas_id)) + s += ", question_text: %s" % ( + tokenization.printable_text(self.question_text)) + s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens)) + if self.start_position: + s += ", start_position: %d" % (self.start_position) + if self.start_position: + s += ", end_position: %d" % (self.end_position) + if self.start_position: + s += ", is_impossible: %r" % (self.is_impossible) + return s + + +class InputFeatures(object): + """A single set of features of data.""" + + def __init__(self, + unique_id, + example_index, + doc_span_index, + tokens, + token_to_orig_map, + token_is_max_context, + input_ids, + input_mask, + segment_ids, + start_position=None, + end_position=None, + is_impossible=None): + self.unique_id = unique_id + self.example_index = example_index + self.doc_span_index = doc_span_index + self.tokens = tokens + self.token_to_orig_map = token_to_orig_map + self.token_is_max_context = token_is_max_context + self.input_ids = input_ids + self.input_mask = input_mask + self.segment_ids = segment_ids + self.start_position = start_position + self.end_position = end_position + self.is_impossible = is_impossible + + +def read_squad_examples(input_file, is_training): + """Read a SQuAD json file into a list of SquadExample.""" + with tf.gfile.Open(input_file, "r") as reader: + input_data = json.load(reader)["data"] + + def is_whitespace(c): + if c == " " or c == "\t" or c == "\r" or c == "\n" or ord(c) == 0x202F: + return True + return False + + examples = [] + for entry in input_data: + for paragraph in entry["paragraphs"]: + paragraph_text = paragraph["context"] + doc_tokens = [] + char_to_word_offset = [] + prev_is_whitespace = True + for c in paragraph_text: + if is_whitespace(c): + prev_is_whitespace = True + else: + if prev_is_whitespace: + doc_tokens.append(c) + else: + doc_tokens[-1] += c + prev_is_whitespace = False + char_to_word_offset.append(len(doc_tokens) - 1) + + for qa in paragraph["qas"]: + qas_id = qa["id"] + question_text = qa["question"] + start_position = None + end_position = None + orig_answer_text = None + is_impossible = False + if is_training: + + if FLAGS.version_2_with_negative: + is_impossible = qa["is_impossible"] + if (len(qa["answers"]) != 1) and (not is_impossible): + raise ValueError( + "For training, each question should have exactly 1 answer.") + if not is_impossible: + answer = qa["answers"][0] + orig_answer_text = answer["text"] + answer_offset = answer["answer_start"] + answer_length = len(orig_answer_text) + start_position = char_to_word_offset[answer_offset] + end_position = char_to_word_offset[answer_offset + answer_length - + 1] + # Only add answers where the text can be exactly recovered from the + # document. If this CAN'T happen it's likely due to weird Unicode + # stuff so we will just skip the example. + # + # Note that this means for training mode, every example is NOT + # guaranteed to be preserved. + actual_text = " ".join( + doc_tokens[start_position:(end_position + 1)]) + cleaned_answer_text = " ".join( + tokenization.whitespace_tokenize(orig_answer_text)) + if actual_text.find(cleaned_answer_text) == -1: + tf.logging.warning("Could not find answer: '%s' vs. '%s'", + actual_text, cleaned_answer_text) + continue + else: + start_position = -1 + end_position = -1 + orig_answer_text = "" + + example = SquadExample( + qas_id=qas_id, + question_text=question_text, + doc_tokens=doc_tokens, + orig_answer_text=orig_answer_text, + start_position=start_position, + end_position=end_position, + is_impossible=is_impossible) + examples.append(example) + + return examples + + +def convert_examples_to_features(examples, tokenizer, max_seq_length, + doc_stride, max_query_length, is_training, + output_fn): + """Loads a data file into a list of `InputBatch`s.""" + + unique_id = 1000000000 + + for (example_index, example) in enumerate(examples): + query_tokens = tokenizer.tokenize(example.question_text) + + if len(query_tokens) > max_query_length: + query_tokens = query_tokens[0:max_query_length] + + tok_to_orig_index = [] + orig_to_tok_index = [] + all_doc_tokens = [] + for (i, token) in enumerate(example.doc_tokens): + orig_to_tok_index.append(len(all_doc_tokens)) + sub_tokens = tokenizer.tokenize(token) + for sub_token in sub_tokens: + tok_to_orig_index.append(i) + all_doc_tokens.append(sub_token) + + tok_start_position = None + tok_end_position = None + if is_training and example.is_impossible: + tok_start_position = -1 + tok_end_position = -1 + if is_training and not example.is_impossible: + tok_start_position = orig_to_tok_index[example.start_position] + if example.end_position < len(example.doc_tokens) - 1: + tok_end_position = orig_to_tok_index[example.end_position + 1] - 1 + else: + tok_end_position = len(all_doc_tokens) - 1 + (tok_start_position, tok_end_position) = _improve_answer_span( + all_doc_tokens, tok_start_position, tok_end_position, tokenizer, + example.orig_answer_text) + + # The -3 accounts for [CLS], [SEP] and [SEP] + max_tokens_for_doc = max_seq_length - len(query_tokens) - 3 + + # We can have documents that are longer than the maximum sequence length. + # To deal with this we do a sliding window approach, where we take chunks + # of the up to our max length with a stride of `doc_stride`. + _DocSpan = collections.namedtuple( # pylint: disable=invalid-name + "DocSpan", ["start", "length"]) + doc_spans = [] + start_offset = 0 + while start_offset < len(all_doc_tokens): + length = len(all_doc_tokens) - start_offset + if length > max_tokens_for_doc: + length = max_tokens_for_doc + doc_spans.append(_DocSpan(start=start_offset, length=length)) + if start_offset + length == len(all_doc_tokens): + break + start_offset += min(length, doc_stride) + + for (doc_span_index, doc_span) in enumerate(doc_spans): + tokens = [] + token_to_orig_map = {} + token_is_max_context = {} + segment_ids = [] + tokens.append("[CLS]") + segment_ids.append(0) + for token in query_tokens: + tokens.append(token) + segment_ids.append(0) + tokens.append("[SEP]") + segment_ids.append(0) + + for i in range(doc_span.length): + split_token_index = doc_span.start + i + token_to_orig_map[len(tokens)] = tok_to_orig_index[split_token_index] + + is_max_context = _check_is_max_context(doc_spans, doc_span_index, + split_token_index) + token_is_max_context[len(tokens)] = is_max_context + tokens.append(all_doc_tokens[split_token_index]) + segment_ids.append(1) + tokens.append("[SEP]") + segment_ids.append(1) + + input_ids = tokenizer.convert_tokens_to_ids(tokens) + + # The mask has 1 for real tokens and 0 for padding tokens. Only real + # tokens are attended to. + input_mask = [1] * len(input_ids) + + # Zero-pad up to the sequence length. + while len(input_ids) < max_seq_length: + input_ids.append(0) + input_mask.append(0) + segment_ids.append(0) + + assert len(input_ids) == max_seq_length + assert len(input_mask) == max_seq_length + assert len(segment_ids) == max_seq_length + + start_position = None + end_position = None + if is_training and not example.is_impossible: + # For training, if our document chunk does not contain an annotation + # we throw it out, since there is nothing to predict. + doc_start = doc_span.start + doc_end = doc_span.start + doc_span.length - 1 + out_of_span = False + if not (tok_start_position >= doc_start and + tok_end_position <= doc_end): + out_of_span = True + if out_of_span: + start_position = 0 + end_position = 0 + else: + doc_offset = len(query_tokens) + 2 + start_position = tok_start_position - doc_start + doc_offset + end_position = tok_end_position - doc_start + doc_offset + + if is_training and example.is_impossible: + start_position = 0 + end_position = 0 + + if example_index < 20: + tf.logging.info("*** Example ***") + tf.logging.info("unique_id: %s" % (unique_id)) + tf.logging.info("example_index: %s" % (example_index)) + tf.logging.info("doc_span_index: %s" % (doc_span_index)) + tf.logging.info("tokens: %s" % " ".join( + [tokenization.printable_text(x) for x in tokens])) + tf.logging.info("token_to_orig_map: %s" % " ".join( + ["%d:%d" % (x, y) for (x, y) in six.iteritems(token_to_orig_map)])) + tf.logging.info("token_is_max_context: %s" % " ".join([ + "%d:%s" % (x, y) for (x, y) in six.iteritems(token_is_max_context) + ])) + tf.logging.info("input_ids: %s" % " ".join([str(x) for x in input_ids])) + tf.logging.info( + "input_mask: %s" % " ".join([str(x) for x in input_mask])) + tf.logging.info( + "segment_ids: %s" % " ".join([str(x) for x in segment_ids])) + if is_training and example.is_impossible: + tf.logging.info("impossible example") + if is_training and not example.is_impossible: + answer_text = " ".join(tokens[start_position:(end_position + 1)]) + tf.logging.info("start_position: %d" % (start_position)) + tf.logging.info("end_position: %d" % (end_position)) + tf.logging.info( + "answer: %s" % (tokenization.printable_text(answer_text))) + + feature = InputFeatures( + unique_id=unique_id, + example_index=example_index, + doc_span_index=doc_span_index, + tokens=tokens, + token_to_orig_map=token_to_orig_map, + token_is_max_context=token_is_max_context, + input_ids=input_ids, + input_mask=input_mask, + segment_ids=segment_ids, + start_position=start_position, + end_position=end_position, + is_impossible=example.is_impossible) + + # Run callback + output_fn(feature) + + unique_id += 1 + + +def _improve_answer_span(doc_tokens, input_start, input_end, tokenizer, + orig_answer_text): + """Returns tokenized answer spans that better match the annotated answer.""" + + # The SQuAD annotations are character based. We first project them to + # whitespace-tokenized words. But then after WordPiece tokenization, we can + # often find a "better match". For example: + # + # Question: What year was John Smith born? + # Context: The leader was John Smith (1895-1943). + # Answer: 1895 + # + # The original whitespace-tokenized answer will be "(1895-1943).". However + # after tokenization, our tokens will be "( 1895 - 1943 ) .". So we can match + # the exact answer, 1895. + # + # However, this is not always possible. Consider the following: + # + # Question: What country is the top exporter of electornics? + # Context: The Japanese electronics industry is the lagest in the world. + # Answer: Japan + # + # In this case, the annotator chose "Japan" as a character sub-span of + # the word "Japanese". Since our WordPiece tokenizer does not split + # "Japanese", we just use "Japanese" as the annotation. This is fairly rare + # in SQuAD, but does happen. + tok_answer_text = " ".join(tokenizer.tokenize(orig_answer_text)) + + for new_start in range(input_start, input_end + 1): + for new_end in range(input_end, new_start - 1, -1): + text_span = " ".join(doc_tokens[new_start:(new_end + 1)]) + if text_span == tok_answer_text: + return (new_start, new_end) + + return (input_start, input_end) + + +def _check_is_max_context(doc_spans, cur_span_index, position): + """Check if this is the 'max context' doc span for the token.""" + + # Because of the sliding window approach taken to scoring documents, a single + # token can appear in multiple documents. E.g. + # Doc: the man went to the store and bought a gallon of milk + # Span A: the man went to the + # Span B: to the store and bought + # Span C: and bought a gallon of + # ... + # + # Now the word 'bought' will have two scores from spans B and C. We only + # want to consider the score with "maximum context", which we define as + # the *minimum* of its left and right context (the *sum* of left and + # right context will always be the same, of course). + # + # In the example the maximum context for 'bought' would be span C since + # it has 1 left context and 3 right context, while span B has 4 left context + # and 0 right context. + best_score = None + best_span_index = None + for (span_index, doc_span) in enumerate(doc_spans): + end = doc_span.start + doc_span.length - 1 + if position < doc_span.start: + continue + if position > end: + continue + num_left_context = position - doc_span.start + num_right_context = end - position + score = min(num_left_context, num_right_context) + 0.01 * doc_span.length + if best_score is None or score > best_score: + best_score = score + best_span_index = span_index + + return cur_span_index == best_span_index + + +def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, + use_one_hot_embeddings): + """Creates a classification model.""" + model = modeling.BertModel( + config=bert_config, + is_training=is_training, + input_ids=input_ids, + input_mask=input_mask, + token_type_ids=segment_ids, + use_one_hot_embeddings=use_one_hot_embeddings) + + final_hidden = model.get_sequence_output() + + final_hidden_shape = modeling.get_shape_list(final_hidden, expected_rank=3) + batch_size = final_hidden_shape[0] + seq_length = final_hidden_shape[1] + hidden_size = final_hidden_shape[2] + + output_weights = tf.get_variable( + "cls/squad/output_weights", [2, hidden_size], + initializer=tf.truncated_normal_initializer(stddev=0.02)) + + output_bias = tf.get_variable( + "cls/squad/output_bias", [2], initializer=tf.zeros_initializer()) + + final_hidden_matrix = tf.reshape(final_hidden, + [batch_size * seq_length, hidden_size]) + logits = tf.matmul(final_hidden_matrix, output_weights, transpose_b=True) + logits = tf.nn.bias_add(logits, output_bias) + + logits = tf.reshape(logits, [batch_size, seq_length, 2]) + logits = tf.transpose(logits, [2, 0, 1]) + + unstacked_logits = tf.unstack(logits, axis=0) + + (start_logits, end_logits) = (unstacked_logits[0], unstacked_logits[1]) + + return (start_logits, end_logits) + + +def model_fn_builder(bert_config, init_checkpoint, learning_rate, + num_train_steps, num_warmup_steps, use_tpu, + use_one_hot_embeddings): + """Returns `model_fn` closure for TPUEstimator.""" + + def model_fn(features, labels, mode, params): # pylint: disable=unused-argument + """The `model_fn` for TPUEstimator.""" + + tf.logging.info("*** Features ***") + for name in sorted(features.keys()): + tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) + + unique_ids = features["unique_ids"] + input_ids = features["input_ids"] + input_mask = features["input_mask"] + segment_ids = features["segment_ids"] + + is_training = (mode == tf.estimator.ModeKeys.TRAIN) + + (start_logits, end_logits) = create_model( + bert_config=bert_config, + is_training=is_training, + input_ids=input_ids, + input_mask=input_mask, + segment_ids=segment_ids, + use_one_hot_embeddings=use_one_hot_embeddings) + + tvars = tf.trainable_variables() + + initialized_variable_names = {} + scaffold_fn = None + if init_checkpoint: + (assignment_map, initialized_variable_names + ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) + if use_tpu: + + def tpu_scaffold(): + tf.train.init_from_checkpoint(init_checkpoint, assignment_map) + return tf.train.Scaffold() + + scaffold_fn = tpu_scaffold + else: + tf.train.init_from_checkpoint(init_checkpoint, assignment_map) + + tf.logging.info("**** Trainable Variables ****") + for var in tvars: + init_string = "" + if var.name in initialized_variable_names: + init_string = ", *INIT_FROM_CKPT*" + tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, + init_string) + + output_spec = None + if mode == tf.estimator.ModeKeys.TRAIN: + seq_length = modeling.get_shape_list(input_ids)[1] + + def compute_loss(logits, positions): + one_hot_positions = tf.one_hot( + positions, depth=seq_length, dtype=tf.float32) + log_probs = tf.nn.log_softmax(logits, axis=-1) + loss = -tf.reduce_mean( + tf.reduce_sum(one_hot_positions * log_probs, axis=-1)) + return loss + + start_positions = features["start_positions"] + end_positions = features["end_positions"] + + start_loss = compute_loss(start_logits, start_positions) + end_loss = compute_loss(end_logits, end_positions) + + total_loss = (start_loss + end_loss) / 2.0 + + train_op = optimization.create_optimizer( + total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) + + output_spec = tf.contrib.tpu.TPUEstimatorSpec( + mode=mode, + loss=total_loss, + train_op=train_op, + scaffold_fn=scaffold_fn) + elif mode == tf.estimator.ModeKeys.PREDICT: + predictions = { + "unique_ids": unique_ids, + "start_logits": start_logits, + "end_logits": end_logits, + } + output_spec = tf.contrib.tpu.TPUEstimatorSpec( + mode=mode, predictions=predictions, scaffold_fn=scaffold_fn) + else: + raise ValueError( + "Only TRAIN and PREDICT modes are supported: %s" % (mode)) + + return output_spec + + return model_fn + + +def input_fn_builder(input_file, seq_length, is_training, drop_remainder): + """Creates an `input_fn` closure to be passed to TPUEstimator.""" + + name_to_features = { + "unique_ids": tf.FixedLenFeature([], tf.int64), + "input_ids": tf.FixedLenFeature([seq_length], tf.int64), + "input_mask": tf.FixedLenFeature([seq_length], tf.int64), + "segment_ids": tf.FixedLenFeature([seq_length], tf.int64), + } + + if is_training: + name_to_features["start_positions"] = tf.FixedLenFeature([], tf.int64) + name_to_features["end_positions"] = tf.FixedLenFeature([], tf.int64) + + def _decode_record(record, name_to_features): + """Decodes a record to a TensorFlow example.""" + example = tf.parse_single_example(record, name_to_features) + + # tf.Example only supports tf.int64, but the TPU only supports tf.int32. + # So cast all int64 to int32. + for name in list(example.keys()): + t = example[name] + if t.dtype == tf.int64: + t = tf.to_int32(t) + example[name] = t + + return example + + def input_fn(params): + """The actual input function.""" + batch_size = params["batch_size"] + + # For training, we want a lot of parallel reading and shuffling. + # For eval, we want no shuffling and parallel reading doesn't matter. + d = tf.data.TFRecordDataset(input_file) + if is_training: + d = d.repeat() + d = d.shuffle(buffer_size=100) + + d = d.apply( + tf.contrib.data.map_and_batch( + lambda record: _decode_record(record, name_to_features), + batch_size=batch_size, + drop_remainder=drop_remainder)) + + return d + + return input_fn + + +RawResult = collections.namedtuple("RawResult", + ["unique_id", "start_logits", "end_logits"]) + + +def write_predictions(all_examples, all_features, all_results, n_best_size, + max_answer_length, do_lower_case, output_prediction_file, + output_nbest_file, output_null_log_odds_file): + """Write final predictions to the json file and log-odds of null if needed.""" + tf.logging.info("Writing predictions to: %s" % (output_prediction_file)) + tf.logging.info("Writing nbest to: %s" % (output_nbest_file)) + + example_index_to_features = collections.defaultdict(list) + for feature in all_features: + example_index_to_features[feature.example_index].append(feature) + + unique_id_to_result = {} + for result in all_results: + unique_id_to_result[result.unique_id] = result + + _PrelimPrediction = collections.namedtuple( # pylint: disable=invalid-name + "PrelimPrediction", + ["feature_index", "start_index", "end_index", "start_logit", "end_logit"]) + + all_predictions = collections.OrderedDict() + all_nbest_json = collections.OrderedDict() + scores_diff_json = collections.OrderedDict() + + for (example_index, example) in enumerate(all_examples): + features = example_index_to_features[example_index] + + prelim_predictions = [] + # keep track of the minimum score of null start+end of position 0 + score_null = 1000000 # large and positive + min_null_feature_index = 0 # the paragraph slice with min mull score + null_start_logit = 0 # the start logit at the slice with min null score + null_end_logit = 0 # the end logit at the slice with min null score + for (feature_index, feature) in enumerate(features): + result = unique_id_to_result[feature.unique_id] + start_indexes = _get_best_indexes(result.start_logits, n_best_size) + end_indexes = _get_best_indexes(result.end_logits, n_best_size) + # if we could have irrelevant answers, get the min score of irrelevant + if FLAGS.version_2_with_negative: + feature_null_score = result.start_logits[0] + result.end_logits[0] + if feature_null_score < score_null: + score_null = feature_null_score + min_null_feature_index = feature_index + null_start_logit = result.start_logits[0] + null_end_logit = result.end_logits[0] + for start_index in start_indexes: + for end_index in end_indexes: + # We could hypothetically create invalid predictions, e.g., predict + # that the start of the span is in the question. We throw out all + # invalid predictions. + if start_index >= len(feature.tokens): + continue + if end_index >= len(feature.tokens): + continue + if start_index not in feature.token_to_orig_map: + continue + if end_index not in feature.token_to_orig_map: + continue + if not feature.token_is_max_context.get(start_index, False): + continue + if end_index < start_index: + continue + length = end_index - start_index + 1 + if length > max_answer_length: + continue + prelim_predictions.append( + _PrelimPrediction( + feature_index=feature_index, + start_index=start_index, + end_index=end_index, + start_logit=result.start_logits[start_index], + end_logit=result.end_logits[end_index])) + + if FLAGS.version_2_with_negative: + prelim_predictions.append( + _PrelimPrediction( + feature_index=min_null_feature_index, + start_index=0, + end_index=0, + start_logit=null_start_logit, + end_logit=null_end_logit)) + prelim_predictions = sorted( + prelim_predictions, + key=lambda x: (x.start_logit + x.end_logit), + reverse=True) + + _NbestPrediction = collections.namedtuple( # pylint: disable=invalid-name + "NbestPrediction", ["text", "start_logit", "end_logit"]) + + seen_predictions = {} + nbest = [] + for pred in prelim_predictions: + if len(nbest) >= n_best_size: + break + feature = features[pred.feature_index] + if pred.start_index > 0: # this is a non-null prediction + tok_tokens = feature.tokens[pred.start_index:(pred.end_index + 1)] + orig_doc_start = feature.token_to_orig_map[pred.start_index] + orig_doc_end = feature.token_to_orig_map[pred.end_index] + orig_tokens = example.doc_tokens[orig_doc_start:(orig_doc_end + 1)] + tok_text = " ".join(tok_tokens) + + # De-tokenize WordPieces that have been split off. + tok_text = tok_text.replace(" ##", "") + tok_text = tok_text.replace("##", "") + + # Clean whitespace + tok_text = tok_text.strip() + tok_text = " ".join(tok_text.split()) + orig_text = " ".join(orig_tokens) + + final_text = get_final_text(tok_text, orig_text, do_lower_case) + if final_text in seen_predictions: + continue + + seen_predictions[final_text] = True + else: + final_text = "" + seen_predictions[final_text] = True + + nbest.append( + _NbestPrediction( + text=final_text, + start_logit=pred.start_logit, + end_logit=pred.end_logit)) + + # if we didn't inlude the empty option in the n-best, inlcude it + if FLAGS.version_2_with_negative: + if "" not in seen_predictions: + nbest.append( + _NbestPrediction( + text="", start_logit=null_start_logit, + end_logit=null_end_logit)) + # In very rare edge cases we could have no valid predictions. So we + # just create a nonce prediction in this case to avoid failure. + if not nbest: + nbest.append( + _NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0)) + + assert len(nbest) >= 1 + + total_scores = [] + best_non_null_entry = None + for entry in nbest: + total_scores.append(entry.start_logit + entry.end_logit) + if not best_non_null_entry: + if entry.text: + best_non_null_entry = entry + + probs = _compute_softmax(total_scores) + + nbest_json = [] + for (i, entry) in enumerate(nbest): + output = collections.OrderedDict() + output["text"] = entry.text + output["probability"] = probs[i] + output["start_logit"] = entry.start_logit + output["end_logit"] = entry.end_logit + nbest_json.append(output) + + assert len(nbest_json) >= 1 + + if not FLAGS.version_2_with_negative: + all_predictions[example.qas_id] = nbest_json[0]["text"] + else: + # predict "" iff the null score - the score of best non-null > threshold + score_diff = score_null - best_non_null_entry.start_logit - ( + best_non_null_entry.end_logit) + scores_diff_json[example.qas_id] = score_diff + if score_diff > FLAGS.null_score_diff_threshold: + all_predictions[example.qas_id] = "" + else: + all_predictions[example.qas_id] = best_non_null_entry.text + + all_nbest_json[example.qas_id] = nbest_json + + with tf.gfile.GFile(output_prediction_file, "w") as writer: + writer.write(json.dumps(all_predictions, indent=4) + "\n") + + with tf.gfile.GFile(output_nbest_file, "w") as writer: + writer.write(json.dumps(all_nbest_json, indent=4) + "\n") + + if FLAGS.version_2_with_negative: + with tf.gfile.GFile(output_null_log_odds_file, "w") as writer: + writer.write(json.dumps(scores_diff_json, indent=4) + "\n") + + +def get_final_text(pred_text, orig_text, do_lower_case): + """Project the tokenized prediction back to the original text.""" + + # When we created the data, we kept track of the alignment between original + # (whitespace tokenized) tokens and our WordPiece tokenized tokens. So + # now `orig_text` contains the span of our original text corresponding to the + # span that we predicted. + # + # However, `orig_text` may contain extra characters that we don't want in + # our prediction. + # + # For example, let's say: + # pred_text = steve smith + # orig_text = Steve Smith's + # + # We don't want to return `orig_text` because it contains the extra "'s". + # + # We don't want to return `pred_text` because it's already been normalized + # (the SQuAD eval script also does punctuation stripping/lower casing but + # our tokenizer does additional normalization like stripping accent + # characters). + # + # What we really want to return is "Steve Smith". + # + # Therefore, we have to apply a semi-complicated alignment heruistic between + # `pred_text` and `orig_text` to get a character-to-charcter alignment. This + # can fail in certain cases in which case we just return `orig_text`. + + def _strip_spaces(text): + ns_chars = [] + ns_to_s_map = collections.OrderedDict() + for (i, c) in enumerate(text): + if c == " ": + continue + ns_to_s_map[len(ns_chars)] = i + ns_chars.append(c) + ns_text = "".join(ns_chars) + return (ns_text, ns_to_s_map) + + # We first tokenize `orig_text`, strip whitespace from the result + # and `pred_text`, and check if they are the same length. If they are + # NOT the same length, the heuristic has failed. If they are the same + # length, we assume the characters are one-to-one aligned. + tokenizer = tokenization.BasicTokenizer(do_lower_case=do_lower_case) + + tok_text = " ".join(tokenizer.tokenize(orig_text)) + + start_position = tok_text.find(pred_text) + if start_position == -1: + if FLAGS.verbose_logging: + tf.logging.info( + "Unable to find text: '%s' in '%s'" % (pred_text, orig_text)) + return orig_text + end_position = start_position + len(pred_text) - 1 + + (orig_ns_text, orig_ns_to_s_map) = _strip_spaces(orig_text) + (tok_ns_text, tok_ns_to_s_map) = _strip_spaces(tok_text) + + if len(orig_ns_text) != len(tok_ns_text): + if FLAGS.verbose_logging: + tf.logging.info("Length not equal after stripping spaces: '%s' vs '%s'", + orig_ns_text, tok_ns_text) + return orig_text + + # We then project the characters in `pred_text` back to `orig_text` using + # the character-to-character alignment. + tok_s_to_ns_map = {} + for (i, tok_index) in six.iteritems(tok_ns_to_s_map): + tok_s_to_ns_map[tok_index] = i + + orig_start_position = None + if start_position in tok_s_to_ns_map: + ns_start_position = tok_s_to_ns_map[start_position] + if ns_start_position in orig_ns_to_s_map: + orig_start_position = orig_ns_to_s_map[ns_start_position] + + if orig_start_position is None: + if FLAGS.verbose_logging: + tf.logging.info("Couldn't map start position") + return orig_text + + orig_end_position = None + if end_position in tok_s_to_ns_map: + ns_end_position = tok_s_to_ns_map[end_position] + if ns_end_position in orig_ns_to_s_map: + orig_end_position = orig_ns_to_s_map[ns_end_position] + + if orig_end_position is None: + if FLAGS.verbose_logging: + tf.logging.info("Couldn't map end position") + return orig_text + + output_text = orig_text[orig_start_position:(orig_end_position + 1)] + return output_text + + +def _get_best_indexes(logits, n_best_size): + """Get the n-best logits from a list.""" + index_and_score = sorted(enumerate(logits), key=lambda x: x[1], reverse=True) + + best_indexes = [] + for i in range(len(index_and_score)): + if i >= n_best_size: + break + best_indexes.append(index_and_score[i][0]) + return best_indexes + + +def _compute_softmax(scores): + """Compute softmax probability over raw logits.""" + if not scores: + return [] + + max_score = None + for score in scores: + if max_score is None or score > max_score: + max_score = score + + exp_scores = [] + total_sum = 0.0 + for score in scores: + x = math.exp(score - max_score) + exp_scores.append(x) + total_sum += x + + probs = [] + for score in exp_scores: + probs.append(score / total_sum) + return probs + + +class FeatureWriter(object): + """Writes InputFeature to TF example file.""" + + def __init__(self, filename, is_training): + self.filename = filename + self.is_training = is_training + self.num_features = 0 + self._writer = tf.python_io.TFRecordWriter(filename) + + def process_feature(self, feature): + """Write a InputFeature to the TFRecordWriter as a tf.train.Example.""" + self.num_features += 1 + + def create_int_feature(values): + feature = tf.train.Feature( + int64_list=tf.train.Int64List(value=list(values))) + return feature + + features = collections.OrderedDict() + features["unique_ids"] = create_int_feature([feature.unique_id]) + features["input_ids"] = create_int_feature(feature.input_ids) + features["input_mask"] = create_int_feature(feature.input_mask) + features["segment_ids"] = create_int_feature(feature.segment_ids) + + if self.is_training: + features["start_positions"] = create_int_feature([feature.start_position]) + features["end_positions"] = create_int_feature([feature.end_position]) + impossible = 0 + if feature.is_impossible: + impossible = 1 + features["is_impossible"] = create_int_feature([impossible]) + + tf_example = tf.train.Example(features=tf.train.Features(feature=features)) + self._writer.write(tf_example.SerializeToString()) + + def close(self): + self._writer.close() + + +def validate_flags_or_throw(bert_config): + """Validate the input FLAGS or throw an exception.""" + tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case, + FLAGS.init_checkpoint) + + if not FLAGS.do_train and not FLAGS.do_predict: + raise ValueError("At least one of `do_train` or `do_predict` must be True.") + + if FLAGS.do_train: + if not FLAGS.train_file: + raise ValueError( + "If `do_train` is True, then `train_file` must be specified.") + if FLAGS.do_predict: + if not FLAGS.predict_file: + raise ValueError( + "If `do_predict` is True, then `predict_file` must be specified.") + + if FLAGS.max_seq_length > bert_config.max_position_embeddings: + raise ValueError( + "Cannot use sequence length %d because the BERT model " + "was only trained up to sequence length %d" % + (FLAGS.max_seq_length, bert_config.max_position_embeddings)) + + if FLAGS.max_seq_length <= FLAGS.max_query_length + 3: + raise ValueError( + "The max_seq_length (%d) must be greater than max_query_length " + "(%d) + 3" % (FLAGS.max_seq_length, FLAGS.max_query_length)) + + +def main(_): + tf.logging.set_verbosity(tf.logging.INFO) + + bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) + + validate_flags_or_throw(bert_config) + + tf.gfile.MakeDirs(FLAGS.output_dir) + + tokenizer = tokenization.FullTokenizer( + vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) + + tpu_cluster_resolver = None + if FLAGS.use_tpu and FLAGS.tpu_name: + tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( + FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) + + is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2 + run_config = tf.contrib.tpu.RunConfig( + cluster=tpu_cluster_resolver, + master=FLAGS.master, + model_dir=FLAGS.output_dir, + save_checkpoints_steps=FLAGS.save_checkpoints_steps, + tpu_config=tf.contrib.tpu.TPUConfig( + iterations_per_loop=FLAGS.iterations_per_loop, + num_shards=FLAGS.num_tpu_cores, + per_host_input_for_training=is_per_host)) + + train_examples = None + num_train_steps = None + num_warmup_steps = None + if FLAGS.do_train: + train_examples = read_squad_examples( + input_file=FLAGS.train_file, is_training=True) + num_train_steps = int( + len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs) + num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) + + # Pre-shuffle the input to avoid having to make a very large shuffle + # buffer in in the `input_fn`. + rng = random.Random(12345) + rng.shuffle(train_examples) + + model_fn = model_fn_builder( + bert_config=bert_config, + init_checkpoint=FLAGS.init_checkpoint, + learning_rate=FLAGS.learning_rate, + num_train_steps=num_train_steps, + num_warmup_steps=num_warmup_steps, + use_tpu=FLAGS.use_tpu, + use_one_hot_embeddings=FLAGS.use_tpu) + + # If TPU is not available, this will fall back to normal Estimator on CPU + # or GPU. + estimator = tf.contrib.tpu.TPUEstimator( + use_tpu=FLAGS.use_tpu, + model_fn=model_fn, + config=run_config, + train_batch_size=FLAGS.train_batch_size, + predict_batch_size=FLAGS.predict_batch_size) + + if FLAGS.do_train: + # We write to a temporary file to avoid storing very large constant tensors + # in memory. + train_writer = FeatureWriter( + filename=os.path.join(FLAGS.output_dir, "train.tf_record"), + is_training=True) + convert_examples_to_features( + examples=train_examples, + tokenizer=tokenizer, + max_seq_length=FLAGS.max_seq_length, + doc_stride=FLAGS.doc_stride, + max_query_length=FLAGS.max_query_length, + is_training=True, + output_fn=train_writer.process_feature) + train_writer.close() + + tf.logging.info("***** Running training *****") + tf.logging.info(" Num orig examples = %d", len(train_examples)) + tf.logging.info(" Num split examples = %d", train_writer.num_features) + tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) + tf.logging.info(" Num steps = %d", num_train_steps) + del train_examples + + train_input_fn = input_fn_builder( + input_file=train_writer.filename, + seq_length=FLAGS.max_seq_length, + is_training=True, + drop_remainder=True) + estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) + + if FLAGS.do_predict: + eval_examples = read_squad_examples( + input_file=FLAGS.predict_file, is_training=False) + + eval_writer = FeatureWriter( + filename=os.path.join(FLAGS.output_dir, "eval.tf_record"), + is_training=False) + eval_features = [] + + def append_feature(feature): + eval_features.append(feature) + eval_writer.process_feature(feature) + + convert_examples_to_features( + examples=eval_examples, + tokenizer=tokenizer, + max_seq_length=FLAGS.max_seq_length, + doc_stride=FLAGS.doc_stride, + max_query_length=FLAGS.max_query_length, + is_training=False, + output_fn=append_feature) + eval_writer.close() + + tf.logging.info("***** Running predictions *****") + tf.logging.info(" Num orig examples = %d", len(eval_examples)) + tf.logging.info(" Num split examples = %d", len(eval_features)) + tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size) + + all_results = [] + + predict_input_fn = input_fn_builder( + input_file=eval_writer.filename, + seq_length=FLAGS.max_seq_length, + is_training=False, + drop_remainder=False) + + # If running eval on the TPU, you will need to specify the number of + # steps. + all_results = [] + for result in estimator.predict( + predict_input_fn, yield_single_examples=True): + if len(all_results) % 1000 == 0: + tf.logging.info("Processing example: %d" % (len(all_results))) + unique_id = int(result["unique_ids"]) + start_logits = [float(x) for x in result["start_logits"].flat] + end_logits = [float(x) for x in result["end_logits"].flat] + all_results.append( + RawResult( + unique_id=unique_id, + start_logits=start_logits, + end_logits=end_logits)) + + output_prediction_file = os.path.join(FLAGS.output_dir, "predictions.json") + output_nbest_file = os.path.join(FLAGS.output_dir, "nbest_predictions.json") + output_null_log_odds_file = os.path.join(FLAGS.output_dir, "null_odds.json") + + write_predictions(eval_examples, eval_features, all_results, + FLAGS.n_best_size, FLAGS.max_answer_length, + FLAGS.do_lower_case, output_prediction_file, + output_nbest_file, output_null_log_odds_file) + + +if __name__ == "__main__": + flags.mark_flag_as_required("vocab_file") + flags.mark_flag_as_required("bert_config_file") + flags.mark_flag_as_required("output_dir") + tf.app.run() diff --git a/server.py b/server.py new file mode 100644 index 0000000..dd276f5 --- /dev/null +++ b/server.py @@ -0,0 +1,417 @@ +import os +import shutil + +import requests +import datetime +import time +import hashlib +import sqlite3 +import pandas +import threading +import logging as log + +server_url = "http://39.100.94.111:8083" +openid = "gpu-server-test1" +password = "1e327b070ab43fd071768a4d474f016adbbf3ea475577fe66a505d9e33b24f2f" +token = None +# 客户端代码 +client_code = "dc9fbb4f4f0b84fa903058991af60e73556494af8a02ef69fb6a93217729f04b" +# 护照认证码 +idcode = None +# 时间戳 +timestamp = "" +# 单次最大处理句数 +max_stn_num = 20000 +# 当前处理的bpt的序号 +bpt_id = 0 +# STNS +stn_list = [] +# 输入数据存储表 +predict_table = "predict_data" +# 模型处理结果输出文件夹 +result_out_dir = "./tmp/eppredict" +# 初始化标志位 +base_init = False + +log.basicConfig(filename=None, format="%(asctime)s %(levelname)s [%(funcName)s] : %(message)s", level=log.INFO) + + +def get_timestamp(): + return str(int(time.mktime(datetime.datetime.now().timetuple())) * 1000) + + +base_headers = {"timestamp": get_timestamp(), "X-Requested-With": ""} +token_headers = {"timestamp": get_timestamp(), "X-Requested-With": "", "signed": "", "openid": openid} + + +# url对象 +def url_parser(url): + return server_url + "/" + url + + +# 计算随机特征值 +def calculate_random_code(): + return hashlib.sha1("RandomCode [{0}][{1}][{2}]".format(openid, get_timestamp(), client_code).encode("utf-8")) \ + .hexdigest() + + +# 计算客户端签名 +def calculate_signed(): + return hashlib.sha1("SIGN [{0}][{1}][{2}]".format(openid, calculate_random_code(), token).encode("utf-8")) \ + .hexdigest() + + +# 检查用户是否存在 +def user_checker(): + log.info("Check User Existence: openid" + str(openid)) + checker_param = {"openid": openid} + base_headers["timestamp"] = get_timestamp() + res = requests.get(url=url_parser("user"), headers=base_headers, params=checker_param) + if res.status_code == 404: + log.warning("User Not Exist: openid" + str(openid)) + return False + else: + log.info("User Exist: openid " + str(openid)) + return True + + +# 注册用户 +def user_register(): + if not user_checker(): + log.info("Try Creating New User: openid " + str(openid)) + register_json = {"openid": openid, "password": password} + register_param = {"clientCode": client_code} + base_headers["timestamp"] = get_timestamp() + res = requests.post(url=url_parser("user/cs"), headers=base_headers, json=register_json, params=register_param) + respond_json = res.json() + if res.status_code == 201 and respond_json["openid"] == openid: + log.info("User Creation Success: openid " + str(openid)) + return False + else: + log.error("User Creation Failed: openid " + str(openid)) + return True + + +# 获得token +def get_token(): + if user_checker(): + log.info("Try Getting New Token") + login_json = {"openid": openid, "password": password, "clientCode": client_code} + res = requests.post(url=url_parser("user/login"), headers=base_headers, json=login_json) + respond_json = res.json() + if res.status_code == 200 and respond_json["info"] == "Authentication Success": + global token + token = respond_json["data"]["token"] + log.info("Succeed In Getting New Token" + str(token)) + else: + if base_init is True: + user_register() + log.error("Fail To Get New Token") + + +# 获得子服务器护照 +def get_csp(): + global idcode + if token is not None: + log.info("Try Getting New CSP") + # 计算客户端签名 + token_headers["signed"] = calculate_signed() + token_headers["timestamp"] = get_timestamp() + res = requests.post(url=url_parser("cs"), headers=token_headers) + respond_json = res.json() + log.debug(respond_json) + # 正常返回 + if res.status_code == 200: + # 无权限检查 + try: + idcode = respond_json["identityCode"] + log.info("Succeed In Getting CSP: idcode " + str(idcode)) + except KeyError: + if respond_json["status"] == 401: + log.warning("Token OUT OF DATE: token " + str(token)) + get_token() + return + + # 无权限返回 + elif res.status_code == 401: + # 重新获取token + log.warning("Token Maybe OUT OF DATE: token " + str(token)) + log.info("Try to Get New Token") + get_token() + else: + log.error("Failed to get New CSP") + else: + get_token() + + +# 更新签证 +def update_csp(): + if idcode is not None: + token_headers["signed"] = calculate_signed() + token_headers["timestamp"] = get_timestamp() + res = requests.put(url=url_parser("cs"), headers=token_headers, params={"idcode": idcode}) + respond_json = res.json() + log.debug(respond_json) + # 成功返回 + if res.status_code == 200 and respond_json["expired"] is False: + log.info("Succeed IN Updating CSP: idcode " + str(idcode)) + log.info("CSP Last Update Time: " + str(respond_json["lastUpdateTime"])) + elif res.status_code == 401: + # 尝试获得新的token + log.warning("Unauthorized Status Code: Try to Get New Token") + get_token() + else: + # 重新获得护照 + log.warning("CSP Maybe OUT OF DATE: idcode " + str(idcode)) + get_csp() + + +# 放弃批处理任务 +def giving_up_bpt(): + global bpt_id + global stn_list + try_count = 3 + while try_count < 3: + try_count += 1 + # 标记任务执行失败 + res = requests.put(url=url_parser("cs/bpt"), + headers=token_headers, + params={"idcode": idcode, "bptId": bpt_id, "status": False}, + json=[]) + + if res.status_code == 201: + log.info("Marking Task Failed Successful: bertId ", bpt_id) + return True + elif res.status_code == 401: + # 尝试获得新的token + log.warning("Unauthorized Status Code: Try to Get New Token") + get_token() + else: + if try_count >= 3: + log.error("Marking Task Failed Eventually Failed: bertId ", bpt_id) + log.warning("Connection Maybe Unstable") + return False + log.warning("Failed and Try: count " + str(try_count)) + + # 清空计算数据 + bpt_id = None + stn_list = [] + + +# 从主服务器获得批处理任务 +def get_bpt_from_server(): + global max_stn_num + global idcode + if idcode is not None: + log.info("Try Getting BPT From Server...") + token_headers["signed"] = calculate_signed() + token_headers["timestamp"] = get_timestamp() + res = requests.get(url=url_parser("cs/bpt"), + headers=token_headers, + params={"idcode": idcode, "maxStnNum": int(max_stn_num)}) + respond_json = res.json() + print(res.json()) + if res.status_code == 200: + global bpt_id + try: + bpt_id = respond_json["id"] + except KeyError: + if respond_json["status"] == 401: + get_token() + return + + # 如果没有批处理任务 + if bpt_id is None: + log.info("No BPT Task For Now") + return + + stns = respond_json["stns"] + if len(stns) == 0: + + log.info("STNS IS EMPTY, Giving UP") + giving_up_bpt() + return + + log.info("Get BPT Task: bptId " + str(bpt_id)) + global stn_list + stn_list = stns + conn = sqlite3.connect(r".\bptdata.db") + # 处理数据 + cursor = conn.cursor() + cursor.execute("DELETE FROM {0}".format(predict_table)) + + log.info("Processing Bert Predict Data...") + for stn in stns: + sql = "INSERT INTO {0} (id, text) values (?, ?)".format(predict_table) + cursor.execute(sql, [stn["stnId"], stn["text"]]) + conn.commit() + conn.close() + log.info("Finished in Processing Bert Predict Data") + + result = execute_bert_predict() + + if result is True: + if processing_bert_result() is True: + log.info("BPT Execution Success: bptId " + str(bpt_id)) + else: + log.info("BPT Execution Eventually Failed: bptId " + str(bpt_id)) + else: + log.error("Bert Model Execution Failed") + + log.info("Try Giving Up BPT Task: bptId " + str(bpt_id)) + giving_up_bpt() + + log.info("Get Status Code: " + str(res.status_code)) + + # 清空计算数据 + bpt_id = None + stn_list = [] + + elif res.status_code == 400: + if respond_json["data"]["exception"] == "org.codedream.epaper.exception.badrequest.AuthExpiredException": + print("Auth Expired Exception: Try to Get New CSP") + get_csp() + return + else: + print("Unknown Exception") + + elif res.status_code == 401: + # 尝试获得新的token + log.warning("Unauthorized Status Code: Try to Get New Token") + get_token() + elif res.status_code == 500: + log.warning("Remote Server Error: Inner Server Error") + print(res.json()) + else: + # 尝试获得护照 + get_csp() + + +# 初始化数据库环境 +def sqlite_create_table(): + conn = sqlite3.connect(r".\bptdata.db") + cursor = conn.cursor() + create_tb_cmd = "CREATE TABLE IF NOT EXISTS {0}" \ + "(id INT PRIMARY KEY," \ + "text INT)".format(predict_table) + cursor.execute(create_tb_cmd) + cursor.execute("DELETE FROM {0}".format(predict_table)) + conn.commit() + conn.close() + + +# 启动BERT神经网络模型 +def execute_bert_predict(): + if os.path.exists(result_out_dir): + shutil.rmtree(result_out_dir) + log.info("BERT Model Executing...") + os.system("python run_classifier.py " + "--task_name=eppdt " + "--do_predict=true " + "--data_dir=./tmp " + "--vocab_file=./chinese_wwm_ext_L-12_H-768_A-12/vocab.txt " + "--bert_config_file=./chinese_wwm_ext_L-12_H-768_A-12/bert_config.json " + "--init_checkpoint=./tmp/epout/model.ckpt-14062 " + "--max_seq_length=64 " + "--output_dir=./tmp/eppredict/ > bert_out.log 2>&1") + result_list = os.listdir(result_out_dir) + log.info("BERT Model Execution Finished.") + if "test_results.tsv" not in result_list: + return False + else: + return True + + +# 处理模型计算结果 +def processing_bert_result(): + result = pandas.read_csv(result_out_dir + '/test_results.tsv', sep='\t', header=None) + token_headers["timestamp"] = get_timestamp() + token_headers["signed"] = calculate_signed() + bpt_result_json = [] + idx = 0 + + for i, row in result.iterrows(): + bpt_result_json.append({"stnid": stn_list[idx]["stnId"], "tagPossible": [row[0], row[1], row[2]]}) + idx += 1 + + log.debug("Bert Result Json") + log.debug(bpt_result_json) + log.info("Processing BERT Model Result Successful") + + # 尝试3次 + try_count = 0 + while try_count < 3: + try_count += 1 + log.info("Uploading BERT Model Result...") + res = requests.put(url=url_parser("cs/bpt"), + headers=token_headers, + params={"idcode": idcode, "bptId": bpt_id, "status": True}, + json=bpt_result_json) + if res.status_code == 201: + log.info("Uploading Successful: bertId " + str(bpt_id)) + return True + elif res.status_code == 401: + # 尝试获得新的token + log.warning("Unauthorized Status Code: Try to Get New Token") + get_token() + else: + if try_count >= 3: + log.error("Uploading Eventually Failed: bertId " + str(bpt_id)) + log.warning("Connection Maybe Unstable") + return False + log.warning("Failed and Try: count " + str(try_count)) + + +# 签证更新多线程定时器 +def update_csp_timer(): + log.info("UPDATE CSP TIMER STARTED") + try: + update_csp() + except: + log.error("Exception Thrown, Restarting Timer...") + finally: + t = threading.Timer(60, update_csp_timer) + t.start() + + +# 批处理任务多线程定时器 +def get_bpt_timer(): + log.info("GET BPT TIMER STARTED") + try: + get_bpt_from_server() + except: + log.error("Exception Thrown, Restarting Timer...") + finally: + t = threading.Timer(15, get_bpt_timer) + t.start() + + +# 初始化工作 +def init(): + global base_init + sqlite_create_table() + user_register() + get_token() + get_csp() + base_init = True + + +# 初始化定时器 +def init_timer(): + update_csp_timer() + get_bpt_timer() + + +if __name__ == "__main__": + try_time = 0 + while try_time < 3: + try: + init() + try_time = 3 + except: + try_time += 1 + time.sleep(5) + + init_timer() + while True: + time.sleep(5) diff --git a/tmp/epout/checkpoint b/tmp/epout/checkpoint new file mode 100644 index 0000000..afa776b --- /dev/null +++ b/tmp/epout/checkpoint @@ -0,0 +1,6 @@ +model_checkpoint_path: "model.ckpt-14062" +all_model_checkpoint_paths: "model.ckpt-11000" +all_model_checkpoint_paths: "model.ckpt-12000" +all_model_checkpoint_paths: "model.ckpt-13000" +all_model_checkpoint_paths: "model.ckpt-14000" +all_model_checkpoint_paths: "model.ckpt-14062" diff --git a/tmp/epout/eval.tf_record b/tmp/epout/eval.tf_record new file mode 100644 index 0000000..32d1791 Binary files /dev/null and b/tmp/epout/eval.tf_record differ diff --git a/tmp/epout/eval/events.out.tfevents.1586543049.iZ8vbescrakld4m4drzcktZ b/tmp/epout/eval/events.out.tfevents.1586543049.iZ8vbescrakld4m4drzcktZ new file mode 100644 index 0000000..aed70db Binary files /dev/null and b/tmp/epout/eval/events.out.tfevents.1586543049.iZ8vbescrakld4m4drzcktZ differ diff --git a/tmp/epout/eval_results.txt b/tmp/epout/eval_results.txt new file mode 100644 index 0000000..87aef38 --- /dev/null +++ b/tmp/epout/eval_results.txt @@ -0,0 +1,4 @@ +eval_accuracy = 0.98253334 +eval_loss = 0.06590833 +global_step = 14062 +loss = 0.06590833 diff --git a/tmp/epout/events.out.tfevents.1586536204.iZ8vbescrakld4m4drzcktZ b/tmp/epout/events.out.tfevents.1586536204.iZ8vbescrakld4m4drzcktZ new file mode 100644 index 0000000..9f35bd4 Binary files /dev/null and b/tmp/epout/events.out.tfevents.1586536204.iZ8vbescrakld4m4drzcktZ differ diff --git a/tmp/epout/graph.pbtxt b/tmp/epout/graph.pbtxt new file mode 100644 index 0000000..8d0c735 --- /dev/null +++ b/tmp/epout/graph.pbtxt @@ -0,0 +1,592992 @@ +node { + name: "global_step/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@global_step" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 0 + } + } + } +} +node { + name: "global_step" + op: "VarHandleOp" + attr { + key: "_class" + value { + list { + s: "loc:@global_step" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "shape" + value { + shape { + } + } + } + attr { + key: "shared_name" + value { + s: "global_step" + } + } +} +node { + name: "global_step/IsInitialized/VarIsInitializedOp" + op: "VarIsInitializedOp" + input: "global_step" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_step/Assign" + op: "AssignVariableOp" + input: "global_step" + input: "global_step/Initializer/zeros" + attr { + key: "_class" + value { + list { + s: "loc:@global_step" + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } +} +node { + name: "global_step/Read/ReadVariableOp" + op: "ReadVariableOp" + input: "global_step" + attr { + key: "_class" + value { + list { + s: "loc:@global_step" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } +} +node { + name: "global_step/VarIsInitializedOp" + op: "VarIsInitializedOp" + input: "global_step" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_step/cond/Switch" + op: "Switch" + input: "global_step/VarIsInitializedOp" + input: "global_step/VarIsInitializedOp" + attr { + key: "T" + value { + type: DT_BOOL + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + shape { + } + } + } + } +} +node { + name: "global_step/cond/switch_t" + op: "Identity" + input: "global_step/cond/Switch:1" + attr { + key: "T" + value { + type: DT_BOOL + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_step/cond/switch_f" + op: "Identity" + input: "global_step/cond/Switch" + attr { + key: "T" + value { + type: DT_BOOL + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_step/cond/pred_id" + op: "Identity" + input: "global_step/VarIsInitializedOp" + attr { + key: "T" + value { + type: DT_BOOL + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_step/cond/Read/ReadVariableOp" + op: "ReadVariableOp" + input: "global_step/cond/Read/ReadVariableOp/Switch:1" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } +} +node { + name: "global_step/cond/Read/ReadVariableOp/Switch" + op: "Switch" + input: "global_step" + input: "global_step/cond/pred_id" + attr { + key: "T" + value { + type: DT_RESOURCE + } + } + attr { + key: "_class" + value { + list { + s: "loc:@global_step" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + shape { + } + } + } + } +} +node { + name: "global_step/cond/Identity" + op: "Identity" + input: "global_step/cond/Read/ReadVariableOp" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_step/cond/Switch_1" + op: "Switch" + input: "global_step/Initializer/zeros" + input: "global_step/cond/pred_id" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@global_step" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + shape { + } + } + } + } +} +node { + name: "global_step/cond/Merge" + op: "Merge" + input: "global_step/cond/Switch_1" + input: "global_step/cond/Identity" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + shape { + } + } + } + } +} +node { + name: "global_step/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 0 + } + } + } +} +node { + name: "global_step/add" + op: "Add" + input: "global_step/cond/Merge" + input: "global_step/add/y" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Const" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./tmp/epout/train.tf_record" + } + } + } +} +node { + name: "flat_filenames/shape" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -1 + } + } + } +} +node { + name: "flat_filenames" + op: "Reshape" + input: "Const" + input: "flat_filenames/shape" + device: "/device:CPU:0" + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "TensorSliceDataset" + op: "TensorSliceDataset" + input: "flat_filenames" + device: "/device:CPU:0" + attr { + key: "Toutput_types" + value { + list { + type: DT_STRING + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "FlatMapDataset" + op: "FlatMapDataset" + input: "TensorSliceDataset" + device: "/device:CPU:0" + attr { + key: "Targuments" + value { + list { + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "f" + value { + func { + name: "__inference_Dataset_flat_map_read_one_file_31" + } + } + } + attr { + key: "output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_STRING + } + } + } +} +node { + name: "count" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: -1 + } + } + } +} +node { + name: "RepeatDataset" + op: "RepeatDataset" + input: "FlatMapDataset" + input: "count" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_STRING + } + } + } +} +node { + name: "buffer_size" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 100 + } + } + } +} +node { + name: "seed" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 0 + } + } + } +} +node { + name: "seed2" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 0 + } + } + } +} +node { + name: "ShuffleDataset" + op: "ShuffleDataset" + input: "RepeatDataset" + input: "buffer_size" + input: "seed" + input: "seed2" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_STRING + } + } + } + attr { + key: "reshuffle_each_iteration" + value { + b: true + } + } +} +node { + name: "batch_size" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 32 + } + } + } +} +node { + name: "num_parallel_calls" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 32 + } + } + } +} +node { + name: "drop_remainder" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_BOOL + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_BOOL + tensor_shape { + } + bool_val: true + } + } + } +} +node { + name: "ExperimentalMapAndBatchDataset" + op: "ExperimentalMapAndBatchDataset" + input: "ShuffleDataset" + input: "batch_size" + input: "num_parallel_calls" + input: "drop_remainder" + device: "/device:CPU:0" + attr { + key: "Targuments" + value { + list { + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "f" + value { + func { + name: "__inference_tf_data_experimental_map_and_batch__61" + } + } + } + attr { + key: "output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + } + shape { + dim { + size: 32 + } + dim { + size: 128 + } + } + shape { + dim { + size: 32 + } + } + shape { + dim { + size: 32 + } + } + shape { + dim { + size: 32 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_INT32 + type: DT_INT32 + type: DT_INT32 + type: DT_INT32 + type: DT_INT32 + } + } + } + attr { + key: "preserve_cardinality" + value { + b: true + } + } +} +node { + name: "optimizations" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 3 + } + } + string_val: "map_and_batch_fusion" + string_val: "noop_elimination" + string_val: "shuffle_and_repeat_fusion" + } + } + } +} +node { + name: "OptimizeDataset" + op: "OptimizeDataset" + input: "ExperimentalMapAndBatchDataset" + input: "optimizations" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "optimization_configs" + value { + list { + s: "map_vectorization:use_choose_fastest:false" + } + } + } + attr { + key: "output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + } + shape { + dim { + size: 32 + } + dim { + size: 128 + } + } + shape { + dim { + size: 32 + } + } + shape { + dim { + size: 32 + } + } + shape { + dim { + size: 32 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_INT32 + type: DT_INT32 + type: DT_INT32 + type: DT_INT32 + type: DT_INT32 + } + } + } +} +node { + name: "ModelDataset" + op: "ModelDataset" + input: "OptimizeDataset" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "cpu_budget" + value { + i: 0 + } + } + attr { + key: "output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + } + shape { + dim { + size: 32 + } + dim { + size: 128 + } + } + shape { + dim { + size: 32 + } + } + shape { + dim { + size: 32 + } + } + shape { + dim { + size: 32 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_INT32 + type: DT_INT32 + type: DT_INT32 + type: DT_INT32 + type: DT_INT32 + } + } + } +} +node { + name: "IteratorV2" + op: "IteratorV2" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + } + shape { + dim { + size: 32 + } + dim { + size: 128 + } + } + shape { + dim { + size: 32 + } + } + shape { + dim { + size: 32 + } + } + shape { + dim { + size: 32 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_INT32 + type: DT_INT32 + type: DT_INT32 + type: DT_INT32 + type: DT_INT32 + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "MakeIterator" + op: "MakeIterator" + input: "ModelDataset" + input: "IteratorV2" + attr { + key: "_class" + value { + list { + s: "loc:@IteratorV2" + } + } + } +} +node { + name: "IteratorToStringHandle" + op: "IteratorToStringHandle" + input: "IteratorV2" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "IteratorGetNext" + op: "IteratorGetNext" + input: "IteratorV2" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + } + shape { + dim { + size: 32 + } + dim { + size: 128 + } + } + shape { + dim { + size: 32 + } + } + shape { + dim { + size: 32 + } + } + shape { + dim { + size: 32 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + } + shape { + dim { + size: 32 + } + dim { + size: 128 + } + } + shape { + dim { + size: 32 + } + } + shape { + dim { + size: 32 + } + } + shape { + dim { + size: 32 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_INT32 + type: DT_INT32 + type: DT_INT32 + type: DT_INT32 + type: DT_INT32 + } + } + } +} +node { + name: "Cast" + op: "Cast" + input: "IteratorGetNext:2" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + } + } + } + } +} +node { + name: "bert/embeddings/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -1 + } + } + } +} +node { + name: "bert/embeddings/ExpandDims" + op: "ExpandDims" + input: "IteratorGetNext" + input: "bert/embeddings/ExpandDims/dim" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tdim" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/embeddings/word_embeddings/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\210R\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/embeddings/word_embeddings/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/word_embeddings/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/embeddings/word_embeddings/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/embeddings/word_embeddings/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 21128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/embeddings/word_embeddings/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/embeddings/word_embeddings/Initializer/truncated_normal/TruncatedNormal" + input: "bert/embeddings/word_embeddings/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 21128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/word_embeddings/Initializer/truncated_normal" + op: "Add" + input: "bert/embeddings/word_embeddings/Initializer/truncated_normal/mul" + input: "bert/embeddings/word_embeddings/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 21128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/word_embeddings" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 21128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 21128 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/word_embeddings/Assign" + op: "Assign" + input: "bert/embeddings/word_embeddings" + input: "bert/embeddings/word_embeddings/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 21128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/word_embeddings/read" + op: "Identity" + input: "bert/embeddings/word_embeddings" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 21128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -1 + } + } + } +} +node { + name: "bert/embeddings/Reshape" + op: "Reshape" + input: "bert/embeddings/ExpandDims" + input: "bert/embeddings/Reshape/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } +} +node { + name: "bert/embeddings/GatherV2/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "bert/embeddings/GatherV2" + op: "GatherV2" + input: "bert/embeddings/word_embeddings/read" + input: "bert/embeddings/Reshape" + input: "bert/embeddings/GatherV2/axis" + attr { + key: "Taxis" + value { + type: DT_INT32 + } + } + attr { + key: "Tindices" + value { + type: DT_INT32 + } + } + attr { + key: "Tparams" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "batch_dims" + value { + i: 0 + } + } +} +node { + name: "bert/embeddings/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/embeddings/Reshape_1" + op: "Reshape" + input: "bert/embeddings/GatherV2" + input: "bert/embeddings/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\002\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal/TruncatedNormal" + input: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal" + op: "Add" + input: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal/mul" + input: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/token_type_embeddings" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/Assign" + op: "Assign" + input: "bert/embeddings/token_type_embeddings" + input: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/read" + op: "Identity" + input: "bert/embeddings/token_type_embeddings" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -1 + } + } + } +} +node { + name: "bert/embeddings/Reshape_2" + op: "Reshape" + input: "IteratorGetNext:4" + input: "bert/embeddings/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } +} +node { + name: "bert/embeddings/one_hot/on_value" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/embeddings/one_hot/off_value" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/one_hot/depth" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "bert/embeddings/one_hot" + op: "OneHot" + input: "bert/embeddings/Reshape_2" + input: "bert/embeddings/one_hot/depth" + input: "bert/embeddings/one_hot/on_value" + input: "bert/embeddings/one_hot/off_value" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "TI" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 2 + } + } + } + } + } + attr { + key: "axis" + value { + i: -1 + } + } +} +node { + name: "bert/embeddings/MatMul" + op: "MatMul" + input: "bert/embeddings/one_hot" + input: "bert/embeddings/token_type_embeddings/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/embeddings/Reshape_3/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/embeddings/Reshape_3" + op: "Reshape" + input: "bert/embeddings/MatMul" + input: "bert/embeddings/Reshape_3/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/add" + op: "Add" + input: "bert/embeddings/Reshape_1" + input: "bert/embeddings/Reshape_3" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/assert_less_equal/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 128 + } + } + } +} +node { + name: "bert/embeddings/assert_less_equal/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 512 + } + } + } +} +node { + name: "bert/embeddings/assert_less_equal/LessEqual" + op: "LessEqual" + input: "bert/embeddings/assert_less_equal/x" + input: "bert/embeddings/assert_less_equal/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/embeddings/assert_less_equal/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "bert/embeddings/assert_less_equal/All" + op: "All" + input: "bert/embeddings/assert_less_equal/LessEqual" + input: "bert/embeddings/assert_less_equal/Const" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "bert/embeddings/assert_less_equal/Assert/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "" + } + } + } +} +node { + name: "bert/embeddings/assert_less_equal/Assert/Const_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "Condition x <= y did not hold element-wise:x (bert/embeddings/assert_less_equal/x:0) = " + } + } + } +} +node { + name: "bert/embeddings/assert_less_equal/Assert/Const_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "y (bert/embeddings/assert_less_equal/y:0) = " + } + } + } +} +node { + name: "bert/embeddings/assert_less_equal/Assert/Assert/data_0" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "" + } + } + } +} +node { + name: "bert/embeddings/assert_less_equal/Assert/Assert/data_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "Condition x <= y did not hold element-wise:x (bert/embeddings/assert_less_equal/x:0) = " + } + } + } +} +node { + name: "bert/embeddings/assert_less_equal/Assert/Assert/data_3" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "y (bert/embeddings/assert_less_equal/y:0) = " + } + } + } +} +node { + name: "bert/embeddings/assert_less_equal/Assert/Assert" + op: "Assert" + input: "bert/embeddings/assert_less_equal/All" + input: "bert/embeddings/assert_less_equal/Assert/Assert/data_0" + input: "bert/embeddings/assert_less_equal/Assert/Assert/data_1" + input: "bert/embeddings/assert_less_equal/x" + input: "bert/embeddings/assert_less_equal/Assert/Assert/data_3" + input: "bert/embeddings/assert_less_equal/y" + attr { + key: "T" + value { + list { + type: DT_STRING + type: DT_STRING + type: DT_INT32 + type: DT_STRING + type: DT_INT32 + } + } + } + attr { + key: "summarize" + value { + i: 3 + } + } +} +node { + name: "bert/embeddings/position_embeddings/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\002\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/embeddings/position_embeddings/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/position_embeddings/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/embeddings/position_embeddings/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/embeddings/position_embeddings/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/embeddings/position_embeddings/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/embeddings/position_embeddings/Initializer/truncated_normal/TruncatedNormal" + input: "bert/embeddings/position_embeddings/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/position_embeddings/Initializer/truncated_normal" + op: "Add" + input: "bert/embeddings/position_embeddings/Initializer/truncated_normal/mul" + input: "bert/embeddings/position_embeddings/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/position_embeddings" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/position_embeddings/Assign" + op: "Assign" + input: "bert/embeddings/position_embeddings" + input: "bert/embeddings/position_embeddings/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/position_embeddings/read" + op: "Identity" + input: "bert/embeddings/position_embeddings" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/Slice/begin" + op: "Const" + input: "^bert/embeddings/assert_less_equal/Assert/Assert" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\000\000\000\000" + } + } + } +} +node { + name: "bert/embeddings/Slice/size" + op: "Const" + input: "^bert/embeddings/assert_less_equal/Assert/Assert" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\200\000\000\000\377\377\377\377" + } + } + } +} +node { + name: "bert/embeddings/Slice" + op: "Slice" + input: "bert/embeddings/position_embeddings/read" + input: "bert/embeddings/Slice/begin" + input: "bert/embeddings/Slice/size" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/Reshape_4/shape" + op: "Const" + input: "^bert/embeddings/assert_less_equal/Assert/Assert" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/embeddings/Reshape_4" + op: "Reshape" + input: "bert/embeddings/Slice" + input: "bert/embeddings/Reshape_4/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/add_1" + op: "Add" + input: "bert/embeddings/add" + input: "bert/embeddings/Reshape_4" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/embeddings/LayerNorm/beta" + input: "bert/embeddings/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/LayerNorm/beta/read" + op: "Identity" + input: "bert/embeddings/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/embeddings/LayerNorm/gamma" + input: "bert/embeddings/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/LayerNorm/gamma/read" + op: "Identity" + input: "bert/embeddings/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/moments/mean" + op: "Mean" + input: "bert/embeddings/add_1" + input: "bert/embeddings/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/embeddings/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/embeddings/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/embeddings/add_1" + input: "bert/embeddings/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/moments/variance" + op: "Mean" + input: "bert/embeddings/LayerNorm/moments/SquaredDifference" + input: "bert/embeddings/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/embeddings/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/embeddings/LayerNorm/moments/variance" + input: "bert/embeddings/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/embeddings/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/embeddings/LayerNorm/batchnorm/Rsqrt" + input: "bert/embeddings/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/embeddings/add_1" + input: "bert/embeddings/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/embeddings/LayerNorm/moments/mean" + input: "bert/embeddings/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/embeddings/LayerNorm/beta/read" + input: "bert/embeddings/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/embeddings/LayerNorm/batchnorm/mul_1" + input: "bert/embeddings/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/dropout/rate" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "bert/embeddings/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/embeddings/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/embeddings/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/embeddings/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/embeddings/dropout/random_uniform/sub" + op: "Sub" + input: "bert/embeddings/dropout/random_uniform/max" + input: "bert/embeddings/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/embeddings/dropout/random_uniform/mul" + op: "Mul" + input: "bert/embeddings/dropout/random_uniform/RandomUniform" + input: "bert/embeddings/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/dropout/random_uniform" + op: "Add" + input: "bert/embeddings/dropout/random_uniform/mul" + input: "bert/embeddings/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/dropout/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/embeddings/dropout/sub" + op: "Sub" + input: "bert/embeddings/dropout/sub/x" + input: "bert/embeddings/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/embeddings/dropout/truediv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/embeddings/dropout/truediv" + op: "RealDiv" + input: "bert/embeddings/dropout/truediv/x" + input: "bert/embeddings/dropout/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/embeddings/dropout/GreaterEqual" + op: "GreaterEqual" + input: "bert/embeddings/dropout/random_uniform" + input: "bert/embeddings/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/dropout/mul" + op: "Mul" + input: "bert/embeddings/LayerNorm/batchnorm/add_1" + input: "bert/embeddings/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/dropout/Cast" + op: "Cast" + input: "bert/embeddings/dropout/GreaterEqual" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_BOOL + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/dropout/mul_1" + op: "Mul" + input: "bert/embeddings/dropout/mul" + input: "bert/embeddings/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\001\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "bert/encoder/Reshape" + op: "Reshape" + input: "IteratorGetNext:1" + input: "bert/encoder/Reshape/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/Cast" + op: "Cast" + input: "bert/encoder/Reshape" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/ones/shape_as_tensor" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/ones/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/ones" + op: "Fill" + input: "bert/encoder/ones/shape_as_tensor" + input: "bert/encoder/ones/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/mul" + op: "Mul" + input: "bert/encoder/ones" + input: "bert/encoder/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\377\377\377\377\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/Reshape_1" + op: "Reshape" + input: "bert/embeddings/dropout/mul_1" + input: "bert/encoder/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_0/attention/self/query/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_0/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_0/attention/self/query/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_0/attention/self/query/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_0/attention/self/query/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/query/kernel" + input: "bert/encoder/layer_0/attention/self/query/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/kernel/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/self/query/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/query/bias" + input: "bert/encoder/layer_0/attention/self/query/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/bias/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/self/query/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/MatMul" + op: "MatMul" + input: "bert/encoder/Reshape_1" + input: "bert/encoder/layer_0/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_0/attention/self/query/MatMul" + input: "bert/encoder/layer_0/attention/self/query/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_0/attention/self/key/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_0/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_0/attention/self/key/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_0/attention/self/key/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_0/attention/self/key/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/key/kernel" + input: "bert/encoder/layer_0/attention/self/key/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/kernel/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/self/key/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/key/bias" + input: "bert/encoder/layer_0/attention/self/key/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/bias/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/self/key/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/MatMul" + op: "MatMul" + input: "bert/encoder/Reshape_1" + input: "bert/encoder/layer_0/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_0/attention/self/key/MatMul" + input: "bert/encoder/layer_0/attention/self/key/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_0/attention/self/value/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_0/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_0/attention/self/value/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_0/attention/self/value/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_0/attention/self/value/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/value/kernel" + input: "bert/encoder/layer_0/attention/self/value/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/kernel/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/self/value/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/value/bias" + input: "bert/encoder/layer_0/attention/self/value/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/bias/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/self/value/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/MatMul" + op: "MatMul" + input: "bert/encoder/Reshape_1" + input: "bert/encoder/layer_0/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_0/attention/self/value/MatMul" + input: "bert/encoder/layer_0/attention/self/value/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/Reshape" + op: "Reshape" + input: "bert/encoder/layer_0/attention/self/query/BiasAdd" + input: "bert/encoder/layer_0/attention/self/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/transpose" + op: "Transpose" + input: "bert/encoder/layer_0/attention/self/Reshape" + input: "bert/encoder/layer_0/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/Reshape_1" + op: "Reshape" + input: "bert/encoder/layer_0/attention/self/key/BiasAdd" + input: "bert/encoder/layer_0/attention/self/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/transpose_1" + op: "Transpose" + input: "bert/encoder/layer_0/attention/self/Reshape_1" + input: "bert/encoder/layer_0/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/MatMul" + op: "BatchMatMulV2" + input: "bert/encoder/layer_0/attention/self/transpose" + input: "bert/encoder/layer_0/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/Mul/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.125 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/Mul" + op: "Mul" + input: "bert/encoder/layer_0/attention/self/MatMul" + input: "bert/encoder/layer_0/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/ExpandDims" + op: "ExpandDims" + input: "bert/encoder/mul" + input: "bert/encoder/layer_0/attention/self/ExpandDims/dim" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tdim" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/sub" + op: "Sub" + input: "bert/encoder/layer_0/attention/self/sub/x" + input: "bert/encoder/layer_0/attention/self/ExpandDims" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/mul_1/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: -10000.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/mul_1" + op: "Mul" + input: "bert/encoder/layer_0/attention/self/sub" + input: "bert/encoder/layer_0/attention/self/mul_1/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/add" + op: "Add" + input: "bert/encoder/layer_0/attention/self/Mul" + input: "bert/encoder/layer_0/attention/self/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/Softmax" + op: "Softmax" + input: "bert/encoder/layer_0/attention/self/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/dropout/rate" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_0/attention/self/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_0/attention/self/dropout/random_uniform/max" + input: "bert/encoder/layer_0/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_0/attention/self/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_0/attention/self/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_0/attention/self/dropout/random_uniform/mul" + input: "bert/encoder/layer_0/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/dropout/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/dropout/sub" + op: "Sub" + input: "bert/encoder/layer_0/attention/self/dropout/sub/x" + input: "bert/encoder/layer_0/attention/self/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/dropout/truediv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/dropout/truediv" + op: "RealDiv" + input: "bert/encoder/layer_0/attention/self/dropout/truediv/x" + input: "bert/encoder/layer_0/attention/self/dropout/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/dropout/GreaterEqual" + op: "GreaterEqual" + input: "bert/encoder/layer_0/attention/self/dropout/random_uniform" + input: "bert/encoder/layer_0/attention/self/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_0/attention/self/Softmax" + input: "bert/encoder/layer_0/attention/self/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/dropout/Cast" + op: "Cast" + input: "bert/encoder/layer_0/attention/self/dropout/GreaterEqual" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_BOOL + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/dropout/mul_1" + op: "Mul" + input: "bert/encoder/layer_0/attention/self/dropout/mul" + input: "bert/encoder/layer_0/attention/self/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/Reshape_2" + op: "Reshape" + input: "bert/encoder/layer_0/attention/self/value/BiasAdd" + input: "bert/encoder/layer_0/attention/self/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/transpose_2/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/transpose_2" + op: "Transpose" + input: "bert/encoder/layer_0/attention/self/Reshape_2" + input: "bert/encoder/layer_0/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/layer_0/attention/self/dropout/mul_1" + input: "bert/encoder/layer_0/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/transpose_3/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/transpose_3" + op: "Transpose" + input: "bert/encoder/layer_0/attention/self/MatMul_1" + input: "bert/encoder/layer_0/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/Reshape_3/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/Reshape_3" + op: "Reshape" + input: "bert/encoder/layer_0/attention/self/transpose_3" + input: "bert/encoder/layer_0/attention/self/Reshape_3/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_0/attention/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_0/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_0/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_0/attention/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_0/attention/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/dense/kernel" + input: "bert/encoder/layer_0/attention/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/dense/bias" + input: "bert/encoder/layer_0/attention/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_0/attention/self/Reshape_3" + input: "bert/encoder/layer_0/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_0/attention/output/dense/MatMul" + input: "bert/encoder/layer_0/attention/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dropout/rate" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_0/attention/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_0/attention/output/dropout/random_uniform/max" + input: "bert/encoder/layer_0/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_0/attention/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_0/attention/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_0/attention/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_0/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dropout/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dropout/sub" + op: "Sub" + input: "bert/encoder/layer_0/attention/output/dropout/sub/x" + input: "bert/encoder/layer_0/attention/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dropout/truediv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dropout/truediv" + op: "RealDiv" + input: "bert/encoder/layer_0/attention/output/dropout/truediv/x" + input: "bert/encoder/layer_0/attention/output/dropout/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dropout/GreaterEqual" + op: "GreaterEqual" + input: "bert/encoder/layer_0/attention/output/dropout/random_uniform" + input: "bert/encoder/layer_0/attention/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_0/attention/output/dense/BiasAdd" + input: "bert/encoder/layer_0/attention/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dropout/Cast" + op: "Cast" + input: "bert/encoder/layer_0/attention/output/dropout/GreaterEqual" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_BOOL + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dropout/mul_1" + op: "Mul" + input: "bert/encoder/layer_0/attention/output/dropout/mul" + input: "bert/encoder/layer_0/attention/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/add" + op: "Add" + input: "bert/encoder/layer_0/attention/output/dropout/mul_1" + input: "bert/encoder/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_0/attention/output/add" + input: "bert/encoder/layer_0/attention/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_0/attention/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_0/attention/output/add" + input: "bert/encoder/layer_0/attention/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_0/attention/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_0/attention/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_0/attention/output/add" + input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_0/attention/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/read" + input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_0/intermediate/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_0/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_0/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_0/intermediate/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_0/intermediate/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_0/intermediate/dense/kernel" + input: "bert/encoder/layer_0/intermediate/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_0/intermediate/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/bias/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/bias/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_0/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_0/intermediate/dense/bias/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_0/intermediate/dense/bias" + input: "bert/encoder/layer_0/intermediate/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_0/intermediate/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_0/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_0/intermediate/dense/MatMul" + input: "bert/encoder/layer_0/intermediate/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/Pow/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 3.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/Pow" + op: "Pow" + input: "bert/encoder/layer_0/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_0/intermediate/dense/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/mul/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.044714998453855515 + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/mul" + op: "Mul" + input: "bert/encoder/layer_0/intermediate/dense/mul/x" + input: "bert/encoder/layer_0/intermediate/dense/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/add" + op: "Add" + input: "bert/encoder/layer_0/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_0/intermediate/dense/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/mul_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.7978845834732056 + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/mul_1" + op: "Mul" + input: "bert/encoder/layer_0/intermediate/dense/mul_1/x" + input: "bert/encoder/layer_0/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/Tanh" + op: "Tanh" + input: "bert/encoder/layer_0/intermediate/dense/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/add_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/add_1" + op: "Add" + input: "bert/encoder/layer_0/intermediate/dense/add_1/x" + input: "bert/encoder/layer_0/intermediate/dense/Tanh" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/mul_2/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.5 + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/mul_2" + op: "Mul" + input: "bert/encoder/layer_0/intermediate/dense/mul_2/x" + input: "bert/encoder/layer_0/intermediate/dense/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/mul_3" + op: "Mul" + input: "bert/encoder/layer_0/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_0/intermediate/dense/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_0/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_0/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_0/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_0/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_0/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_0/output/dense/kernel" + input: "bert/encoder/layer_0/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_0/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_0/output/dense/bias" + input: "bert/encoder/layer_0/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_0/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_0/intermediate/dense/mul_3" + input: "bert/encoder/layer_0/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_0/output/dense/MatMul" + input: "bert/encoder/layer_0/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_0/output/dropout/rate" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_0/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_0/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_0/output/dropout/random_uniform/max" + input: "bert/encoder/layer_0/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_0/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_0/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_0/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_0/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dropout/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dropout/sub" + op: "Sub" + input: "bert/encoder/layer_0/output/dropout/sub/x" + input: "bert/encoder/layer_0/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dropout/truediv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dropout/truediv" + op: "RealDiv" + input: "bert/encoder/layer_0/output/dropout/truediv/x" + input: "bert/encoder/layer_0/output/dropout/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dropout/GreaterEqual" + op: "GreaterEqual" + input: "bert/encoder/layer_0/output/dropout/random_uniform" + input: "bert/encoder/layer_0/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_0/output/dense/BiasAdd" + input: "bert/encoder/layer_0/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dropout/Cast" + op: "Cast" + input: "bert/encoder/layer_0/output/dropout/GreaterEqual" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_BOOL + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dropout/mul_1" + op: "Mul" + input: "bert/encoder/layer_0/output/dropout/mul" + input: "bert/encoder/layer_0/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/add" + op: "Add" + input: "bert/encoder/layer_0/output/dropout/mul_1" + input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_0/output/LayerNorm/beta" + input: "bert/encoder/layer_0/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_0/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_0/output/LayerNorm/gamma" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_0/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_0/output/add" + input: "bert/encoder/layer_0/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_0/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_0/output/add" + input: "bert/encoder/layer_0/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_0/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_0/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_0/output/add" + input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_0/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_0/output/LayerNorm/beta/read" + input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_1/attention/self/query/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_1/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_1/attention/self/query/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_1/attention/self/query/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_1/attention/self/query/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/query/kernel" + input: "bert/encoder/layer_1/attention/self/query/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/kernel/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/self/query/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/query/bias" + input: "bert/encoder/layer_1/attention/self/query/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/bias/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/self/query/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/MatMul" + op: "MatMul" + input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_1/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_1/attention/self/query/MatMul" + input: "bert/encoder/layer_1/attention/self/query/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_1/attention/self/key/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_1/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_1/attention/self/key/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_1/attention/self/key/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_1/attention/self/key/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/key/kernel" + input: "bert/encoder/layer_1/attention/self/key/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/kernel/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/self/key/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/key/bias" + input: "bert/encoder/layer_1/attention/self/key/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/bias/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/self/key/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/MatMul" + op: "MatMul" + input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_1/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_1/attention/self/key/MatMul" + input: "bert/encoder/layer_1/attention/self/key/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_1/attention/self/value/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_1/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_1/attention/self/value/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_1/attention/self/value/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_1/attention/self/value/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/value/kernel" + input: "bert/encoder/layer_1/attention/self/value/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/kernel/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/self/value/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/value/bias" + input: "bert/encoder/layer_1/attention/self/value/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/bias/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/self/value/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/MatMul" + op: "MatMul" + input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_1/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_1/attention/self/value/MatMul" + input: "bert/encoder/layer_1/attention/self/value/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/Reshape" + op: "Reshape" + input: "bert/encoder/layer_1/attention/self/query/BiasAdd" + input: "bert/encoder/layer_1/attention/self/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/transpose" + op: "Transpose" + input: "bert/encoder/layer_1/attention/self/Reshape" + input: "bert/encoder/layer_1/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/Reshape_1" + op: "Reshape" + input: "bert/encoder/layer_1/attention/self/key/BiasAdd" + input: "bert/encoder/layer_1/attention/self/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/transpose_1" + op: "Transpose" + input: "bert/encoder/layer_1/attention/self/Reshape_1" + input: "bert/encoder/layer_1/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/MatMul" + op: "BatchMatMulV2" + input: "bert/encoder/layer_1/attention/self/transpose" + input: "bert/encoder/layer_1/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/Mul/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.125 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/Mul" + op: "Mul" + input: "bert/encoder/layer_1/attention/self/MatMul" + input: "bert/encoder/layer_1/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/ExpandDims" + op: "ExpandDims" + input: "bert/encoder/mul" + input: "bert/encoder/layer_1/attention/self/ExpandDims/dim" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tdim" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/sub" + op: "Sub" + input: "bert/encoder/layer_1/attention/self/sub/x" + input: "bert/encoder/layer_1/attention/self/ExpandDims" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/mul_1/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: -10000.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/mul_1" + op: "Mul" + input: "bert/encoder/layer_1/attention/self/sub" + input: "bert/encoder/layer_1/attention/self/mul_1/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/add" + op: "Add" + input: "bert/encoder/layer_1/attention/self/Mul" + input: "bert/encoder/layer_1/attention/self/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/Softmax" + op: "Softmax" + input: "bert/encoder/layer_1/attention/self/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/dropout/rate" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_1/attention/self/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_1/attention/self/dropout/random_uniform/max" + input: "bert/encoder/layer_1/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_1/attention/self/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_1/attention/self/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_1/attention/self/dropout/random_uniform/mul" + input: "bert/encoder/layer_1/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/dropout/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/dropout/sub" + op: "Sub" + input: "bert/encoder/layer_1/attention/self/dropout/sub/x" + input: "bert/encoder/layer_1/attention/self/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/dropout/truediv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/dropout/truediv" + op: "RealDiv" + input: "bert/encoder/layer_1/attention/self/dropout/truediv/x" + input: "bert/encoder/layer_1/attention/self/dropout/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/dropout/GreaterEqual" + op: "GreaterEqual" + input: "bert/encoder/layer_1/attention/self/dropout/random_uniform" + input: "bert/encoder/layer_1/attention/self/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_1/attention/self/Softmax" + input: "bert/encoder/layer_1/attention/self/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/dropout/Cast" + op: "Cast" + input: "bert/encoder/layer_1/attention/self/dropout/GreaterEqual" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_BOOL + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/dropout/mul_1" + op: "Mul" + input: "bert/encoder/layer_1/attention/self/dropout/mul" + input: "bert/encoder/layer_1/attention/self/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/Reshape_2" + op: "Reshape" + input: "bert/encoder/layer_1/attention/self/value/BiasAdd" + input: "bert/encoder/layer_1/attention/self/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/transpose_2/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/transpose_2" + op: "Transpose" + input: "bert/encoder/layer_1/attention/self/Reshape_2" + input: "bert/encoder/layer_1/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/layer_1/attention/self/dropout/mul_1" + input: "bert/encoder/layer_1/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/transpose_3/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/transpose_3" + op: "Transpose" + input: "bert/encoder/layer_1/attention/self/MatMul_1" + input: "bert/encoder/layer_1/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/Reshape_3/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/Reshape_3" + op: "Reshape" + input: "bert/encoder/layer_1/attention/self/transpose_3" + input: "bert/encoder/layer_1/attention/self/Reshape_3/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_1/attention/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_1/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_1/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_1/attention/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_1/attention/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/dense/kernel" + input: "bert/encoder/layer_1/attention/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/dense/bias" + input: "bert/encoder/layer_1/attention/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_1/attention/self/Reshape_3" + input: "bert/encoder/layer_1/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_1/attention/output/dense/MatMul" + input: "bert/encoder/layer_1/attention/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dropout/rate" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_1/attention/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_1/attention/output/dropout/random_uniform/max" + input: "bert/encoder/layer_1/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_1/attention/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_1/attention/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_1/attention/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_1/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dropout/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dropout/sub" + op: "Sub" + input: "bert/encoder/layer_1/attention/output/dropout/sub/x" + input: "bert/encoder/layer_1/attention/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dropout/truediv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dropout/truediv" + op: "RealDiv" + input: "bert/encoder/layer_1/attention/output/dropout/truediv/x" + input: "bert/encoder/layer_1/attention/output/dropout/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dropout/GreaterEqual" + op: "GreaterEqual" + input: "bert/encoder/layer_1/attention/output/dropout/random_uniform" + input: "bert/encoder/layer_1/attention/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_1/attention/output/dense/BiasAdd" + input: "bert/encoder/layer_1/attention/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dropout/Cast" + op: "Cast" + input: "bert/encoder/layer_1/attention/output/dropout/GreaterEqual" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_BOOL + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dropout/mul_1" + op: "Mul" + input: "bert/encoder/layer_1/attention/output/dropout/mul" + input: "bert/encoder/layer_1/attention/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/add" + op: "Add" + input: "bert/encoder/layer_1/attention/output/dropout/mul_1" + input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_1/attention/output/add" + input: "bert/encoder/layer_1/attention/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_1/attention/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_1/attention/output/add" + input: "bert/encoder/layer_1/attention/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_1/attention/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_1/attention/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_1/attention/output/add" + input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_1/attention/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/read" + input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_1/intermediate/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_1/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_1/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_1/intermediate/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_1/intermediate/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_1/intermediate/dense/kernel" + input: "bert/encoder/layer_1/intermediate/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_1/intermediate/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/bias/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/bias/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_1/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_1/intermediate/dense/bias/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_1/intermediate/dense/bias" + input: "bert/encoder/layer_1/intermediate/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_1/intermediate/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_1/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_1/intermediate/dense/MatMul" + input: "bert/encoder/layer_1/intermediate/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/Pow/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 3.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/Pow" + op: "Pow" + input: "bert/encoder/layer_1/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_1/intermediate/dense/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/mul/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.044714998453855515 + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/mul" + op: "Mul" + input: "bert/encoder/layer_1/intermediate/dense/mul/x" + input: "bert/encoder/layer_1/intermediate/dense/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/add" + op: "Add" + input: "bert/encoder/layer_1/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_1/intermediate/dense/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/mul_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.7978845834732056 + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/mul_1" + op: "Mul" + input: "bert/encoder/layer_1/intermediate/dense/mul_1/x" + input: "bert/encoder/layer_1/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/Tanh" + op: "Tanh" + input: "bert/encoder/layer_1/intermediate/dense/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/add_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/add_1" + op: "Add" + input: "bert/encoder/layer_1/intermediate/dense/add_1/x" + input: "bert/encoder/layer_1/intermediate/dense/Tanh" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/mul_2/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.5 + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/mul_2" + op: "Mul" + input: "bert/encoder/layer_1/intermediate/dense/mul_2/x" + input: "bert/encoder/layer_1/intermediate/dense/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/mul_3" + op: "Mul" + input: "bert/encoder/layer_1/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_1/intermediate/dense/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_1/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_1/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_1/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_1/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_1/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_1/output/dense/kernel" + input: "bert/encoder/layer_1/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_1/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_1/output/dense/bias" + input: "bert/encoder/layer_1/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_1/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_1/intermediate/dense/mul_3" + input: "bert/encoder/layer_1/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_1/output/dense/MatMul" + input: "bert/encoder/layer_1/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_1/output/dropout/rate" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_1/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_1/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_1/output/dropout/random_uniform/max" + input: "bert/encoder/layer_1/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_1/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_1/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_1/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_1/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dropout/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dropout/sub" + op: "Sub" + input: "bert/encoder/layer_1/output/dropout/sub/x" + input: "bert/encoder/layer_1/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dropout/truediv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dropout/truediv" + op: "RealDiv" + input: "bert/encoder/layer_1/output/dropout/truediv/x" + input: "bert/encoder/layer_1/output/dropout/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dropout/GreaterEqual" + op: "GreaterEqual" + input: "bert/encoder/layer_1/output/dropout/random_uniform" + input: "bert/encoder/layer_1/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_1/output/dense/BiasAdd" + input: "bert/encoder/layer_1/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dropout/Cast" + op: "Cast" + input: "bert/encoder/layer_1/output/dropout/GreaterEqual" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_BOOL + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dropout/mul_1" + op: "Mul" + input: "bert/encoder/layer_1/output/dropout/mul" + input: "bert/encoder/layer_1/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/add" + op: "Add" + input: "bert/encoder/layer_1/output/dropout/mul_1" + input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_1/output/LayerNorm/beta" + input: "bert/encoder/layer_1/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_1/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_1/output/LayerNorm/gamma" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_1/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_1/output/add" + input: "bert/encoder/layer_1/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_1/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_1/output/add" + input: "bert/encoder/layer_1/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_1/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_1/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_1/output/add" + input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_1/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_1/output/LayerNorm/beta/read" + input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_2/attention/self/query/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_2/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_2/attention/self/query/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_2/attention/self/query/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_2/attention/self/query/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/query/kernel" + input: "bert/encoder/layer_2/attention/self/query/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/kernel/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/self/query/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/query/bias" + input: "bert/encoder/layer_2/attention/self/query/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/bias/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/self/query/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/MatMul" + op: "MatMul" + input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_2/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_2/attention/self/query/MatMul" + input: "bert/encoder/layer_2/attention/self/query/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_2/attention/self/key/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_2/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_2/attention/self/key/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_2/attention/self/key/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_2/attention/self/key/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/key/kernel" + input: "bert/encoder/layer_2/attention/self/key/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/kernel/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/self/key/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/key/bias" + input: "bert/encoder/layer_2/attention/self/key/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/bias/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/self/key/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/MatMul" + op: "MatMul" + input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_2/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_2/attention/self/key/MatMul" + input: "bert/encoder/layer_2/attention/self/key/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_2/attention/self/value/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_2/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_2/attention/self/value/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_2/attention/self/value/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_2/attention/self/value/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/value/kernel" + input: "bert/encoder/layer_2/attention/self/value/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/kernel/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/self/value/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/value/bias" + input: "bert/encoder/layer_2/attention/self/value/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/bias/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/self/value/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/MatMul" + op: "MatMul" + input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_2/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_2/attention/self/value/MatMul" + input: "bert/encoder/layer_2/attention/self/value/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/Reshape" + op: "Reshape" + input: "bert/encoder/layer_2/attention/self/query/BiasAdd" + input: "bert/encoder/layer_2/attention/self/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/transpose" + op: "Transpose" + input: "bert/encoder/layer_2/attention/self/Reshape" + input: "bert/encoder/layer_2/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/Reshape_1" + op: "Reshape" + input: "bert/encoder/layer_2/attention/self/key/BiasAdd" + input: "bert/encoder/layer_2/attention/self/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/transpose_1" + op: "Transpose" + input: "bert/encoder/layer_2/attention/self/Reshape_1" + input: "bert/encoder/layer_2/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/MatMul" + op: "BatchMatMulV2" + input: "bert/encoder/layer_2/attention/self/transpose" + input: "bert/encoder/layer_2/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/Mul/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.125 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/Mul" + op: "Mul" + input: "bert/encoder/layer_2/attention/self/MatMul" + input: "bert/encoder/layer_2/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/ExpandDims" + op: "ExpandDims" + input: "bert/encoder/mul" + input: "bert/encoder/layer_2/attention/self/ExpandDims/dim" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tdim" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/sub" + op: "Sub" + input: "bert/encoder/layer_2/attention/self/sub/x" + input: "bert/encoder/layer_2/attention/self/ExpandDims" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/mul_1/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: -10000.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/mul_1" + op: "Mul" + input: "bert/encoder/layer_2/attention/self/sub" + input: "bert/encoder/layer_2/attention/self/mul_1/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/add" + op: "Add" + input: "bert/encoder/layer_2/attention/self/Mul" + input: "bert/encoder/layer_2/attention/self/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/Softmax" + op: "Softmax" + input: "bert/encoder/layer_2/attention/self/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/dropout/rate" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_2/attention/self/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_2/attention/self/dropout/random_uniform/max" + input: "bert/encoder/layer_2/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_2/attention/self/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_2/attention/self/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_2/attention/self/dropout/random_uniform/mul" + input: "bert/encoder/layer_2/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/dropout/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/dropout/sub" + op: "Sub" + input: "bert/encoder/layer_2/attention/self/dropout/sub/x" + input: "bert/encoder/layer_2/attention/self/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/dropout/truediv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/dropout/truediv" + op: "RealDiv" + input: "bert/encoder/layer_2/attention/self/dropout/truediv/x" + input: "bert/encoder/layer_2/attention/self/dropout/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/dropout/GreaterEqual" + op: "GreaterEqual" + input: "bert/encoder/layer_2/attention/self/dropout/random_uniform" + input: "bert/encoder/layer_2/attention/self/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_2/attention/self/Softmax" + input: "bert/encoder/layer_2/attention/self/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/dropout/Cast" + op: "Cast" + input: "bert/encoder/layer_2/attention/self/dropout/GreaterEqual" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_BOOL + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/dropout/mul_1" + op: "Mul" + input: "bert/encoder/layer_2/attention/self/dropout/mul" + input: "bert/encoder/layer_2/attention/self/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/Reshape_2" + op: "Reshape" + input: "bert/encoder/layer_2/attention/self/value/BiasAdd" + input: "bert/encoder/layer_2/attention/self/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/transpose_2/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/transpose_2" + op: "Transpose" + input: "bert/encoder/layer_2/attention/self/Reshape_2" + input: "bert/encoder/layer_2/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/layer_2/attention/self/dropout/mul_1" + input: "bert/encoder/layer_2/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/transpose_3/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/transpose_3" + op: "Transpose" + input: "bert/encoder/layer_2/attention/self/MatMul_1" + input: "bert/encoder/layer_2/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/Reshape_3/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/Reshape_3" + op: "Reshape" + input: "bert/encoder/layer_2/attention/self/transpose_3" + input: "bert/encoder/layer_2/attention/self/Reshape_3/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_2/attention/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_2/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_2/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_2/attention/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_2/attention/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/dense/kernel" + input: "bert/encoder/layer_2/attention/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/dense/bias" + input: "bert/encoder/layer_2/attention/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_2/attention/self/Reshape_3" + input: "bert/encoder/layer_2/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_2/attention/output/dense/MatMul" + input: "bert/encoder/layer_2/attention/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dropout/rate" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_2/attention/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_2/attention/output/dropout/random_uniform/max" + input: "bert/encoder/layer_2/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_2/attention/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_2/attention/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_2/attention/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_2/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dropout/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dropout/sub" + op: "Sub" + input: "bert/encoder/layer_2/attention/output/dropout/sub/x" + input: "bert/encoder/layer_2/attention/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dropout/truediv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dropout/truediv" + op: "RealDiv" + input: "bert/encoder/layer_2/attention/output/dropout/truediv/x" + input: "bert/encoder/layer_2/attention/output/dropout/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dropout/GreaterEqual" + op: "GreaterEqual" + input: "bert/encoder/layer_2/attention/output/dropout/random_uniform" + input: "bert/encoder/layer_2/attention/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_2/attention/output/dense/BiasAdd" + input: "bert/encoder/layer_2/attention/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dropout/Cast" + op: "Cast" + input: "bert/encoder/layer_2/attention/output/dropout/GreaterEqual" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_BOOL + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dropout/mul_1" + op: "Mul" + input: "bert/encoder/layer_2/attention/output/dropout/mul" + input: "bert/encoder/layer_2/attention/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/add" + op: "Add" + input: "bert/encoder/layer_2/attention/output/dropout/mul_1" + input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_2/attention/output/add" + input: "bert/encoder/layer_2/attention/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_2/attention/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_2/attention/output/add" + input: "bert/encoder/layer_2/attention/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_2/attention/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_2/attention/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_2/attention/output/add" + input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_2/attention/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/read" + input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_2/intermediate/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_2/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_2/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_2/intermediate/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_2/intermediate/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_2/intermediate/dense/kernel" + input: "bert/encoder/layer_2/intermediate/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_2/intermediate/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/bias/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/bias/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_2/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_2/intermediate/dense/bias/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_2/intermediate/dense/bias" + input: "bert/encoder/layer_2/intermediate/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_2/intermediate/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_2/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_2/intermediate/dense/MatMul" + input: "bert/encoder/layer_2/intermediate/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/Pow/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 3.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/Pow" + op: "Pow" + input: "bert/encoder/layer_2/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_2/intermediate/dense/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/mul/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.044714998453855515 + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/mul" + op: "Mul" + input: "bert/encoder/layer_2/intermediate/dense/mul/x" + input: "bert/encoder/layer_2/intermediate/dense/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/add" + op: "Add" + input: "bert/encoder/layer_2/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_2/intermediate/dense/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/mul_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.7978845834732056 + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/mul_1" + op: "Mul" + input: "bert/encoder/layer_2/intermediate/dense/mul_1/x" + input: "bert/encoder/layer_2/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/Tanh" + op: "Tanh" + input: "bert/encoder/layer_2/intermediate/dense/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/add_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/add_1" + op: "Add" + input: "bert/encoder/layer_2/intermediate/dense/add_1/x" + input: "bert/encoder/layer_2/intermediate/dense/Tanh" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/mul_2/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.5 + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/mul_2" + op: "Mul" + input: "bert/encoder/layer_2/intermediate/dense/mul_2/x" + input: "bert/encoder/layer_2/intermediate/dense/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/mul_3" + op: "Mul" + input: "bert/encoder/layer_2/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_2/intermediate/dense/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_2/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_2/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_2/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_2/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_2/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_2/output/dense/kernel" + input: "bert/encoder/layer_2/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_2/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_2/output/dense/bias" + input: "bert/encoder/layer_2/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_2/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_2/intermediate/dense/mul_3" + input: "bert/encoder/layer_2/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_2/output/dense/MatMul" + input: "bert/encoder/layer_2/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_2/output/dropout/rate" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_2/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_2/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_2/output/dropout/random_uniform/max" + input: "bert/encoder/layer_2/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_2/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_2/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_2/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_2/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dropout/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dropout/sub" + op: "Sub" + input: "bert/encoder/layer_2/output/dropout/sub/x" + input: "bert/encoder/layer_2/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dropout/truediv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dropout/truediv" + op: "RealDiv" + input: "bert/encoder/layer_2/output/dropout/truediv/x" + input: "bert/encoder/layer_2/output/dropout/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dropout/GreaterEqual" + op: "GreaterEqual" + input: "bert/encoder/layer_2/output/dropout/random_uniform" + input: "bert/encoder/layer_2/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_2/output/dense/BiasAdd" + input: "bert/encoder/layer_2/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dropout/Cast" + op: "Cast" + input: "bert/encoder/layer_2/output/dropout/GreaterEqual" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_BOOL + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dropout/mul_1" + op: "Mul" + input: "bert/encoder/layer_2/output/dropout/mul" + input: "bert/encoder/layer_2/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/add" + op: "Add" + input: "bert/encoder/layer_2/output/dropout/mul_1" + input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_2/output/LayerNorm/beta" + input: "bert/encoder/layer_2/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_2/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_2/output/LayerNorm/gamma" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_2/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_2/output/add" + input: "bert/encoder/layer_2/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_2/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_2/output/add" + input: "bert/encoder/layer_2/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_2/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_2/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_2/output/add" + input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_2/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_2/output/LayerNorm/beta/read" + input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_3/attention/self/query/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_3/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_3/attention/self/query/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_3/attention/self/query/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_3/attention/self/query/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/query/kernel" + input: "bert/encoder/layer_3/attention/self/query/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/kernel/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/self/query/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/query/bias" + input: "bert/encoder/layer_3/attention/self/query/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/bias/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/self/query/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/MatMul" + op: "MatMul" + input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_3/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_3/attention/self/query/MatMul" + input: "bert/encoder/layer_3/attention/self/query/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_3/attention/self/key/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_3/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_3/attention/self/key/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_3/attention/self/key/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_3/attention/self/key/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/key/kernel" + input: "bert/encoder/layer_3/attention/self/key/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/kernel/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/self/key/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/key/bias" + input: "bert/encoder/layer_3/attention/self/key/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/bias/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/self/key/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/MatMul" + op: "MatMul" + input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_3/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_3/attention/self/key/MatMul" + input: "bert/encoder/layer_3/attention/self/key/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_3/attention/self/value/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_3/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_3/attention/self/value/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_3/attention/self/value/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_3/attention/self/value/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/value/kernel" + input: "bert/encoder/layer_3/attention/self/value/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/kernel/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/self/value/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/value/bias" + input: "bert/encoder/layer_3/attention/self/value/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/bias/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/self/value/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/MatMul" + op: "MatMul" + input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_3/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_3/attention/self/value/MatMul" + input: "bert/encoder/layer_3/attention/self/value/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/Reshape" + op: "Reshape" + input: "bert/encoder/layer_3/attention/self/query/BiasAdd" + input: "bert/encoder/layer_3/attention/self/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/transpose" + op: "Transpose" + input: "bert/encoder/layer_3/attention/self/Reshape" + input: "bert/encoder/layer_3/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/Reshape_1" + op: "Reshape" + input: "bert/encoder/layer_3/attention/self/key/BiasAdd" + input: "bert/encoder/layer_3/attention/self/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/transpose_1" + op: "Transpose" + input: "bert/encoder/layer_3/attention/self/Reshape_1" + input: "bert/encoder/layer_3/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/MatMul" + op: "BatchMatMulV2" + input: "bert/encoder/layer_3/attention/self/transpose" + input: "bert/encoder/layer_3/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/Mul/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.125 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/Mul" + op: "Mul" + input: "bert/encoder/layer_3/attention/self/MatMul" + input: "bert/encoder/layer_3/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/ExpandDims" + op: "ExpandDims" + input: "bert/encoder/mul" + input: "bert/encoder/layer_3/attention/self/ExpandDims/dim" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tdim" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/sub" + op: "Sub" + input: "bert/encoder/layer_3/attention/self/sub/x" + input: "bert/encoder/layer_3/attention/self/ExpandDims" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/mul_1/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: -10000.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/mul_1" + op: "Mul" + input: "bert/encoder/layer_3/attention/self/sub" + input: "bert/encoder/layer_3/attention/self/mul_1/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/add" + op: "Add" + input: "bert/encoder/layer_3/attention/self/Mul" + input: "bert/encoder/layer_3/attention/self/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/Softmax" + op: "Softmax" + input: "bert/encoder/layer_3/attention/self/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/dropout/rate" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_3/attention/self/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_3/attention/self/dropout/random_uniform/max" + input: "bert/encoder/layer_3/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_3/attention/self/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_3/attention/self/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_3/attention/self/dropout/random_uniform/mul" + input: "bert/encoder/layer_3/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/dropout/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/dropout/sub" + op: "Sub" + input: "bert/encoder/layer_3/attention/self/dropout/sub/x" + input: "bert/encoder/layer_3/attention/self/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/dropout/truediv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/dropout/truediv" + op: "RealDiv" + input: "bert/encoder/layer_3/attention/self/dropout/truediv/x" + input: "bert/encoder/layer_3/attention/self/dropout/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/dropout/GreaterEqual" + op: "GreaterEqual" + input: "bert/encoder/layer_3/attention/self/dropout/random_uniform" + input: "bert/encoder/layer_3/attention/self/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_3/attention/self/Softmax" + input: "bert/encoder/layer_3/attention/self/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/dropout/Cast" + op: "Cast" + input: "bert/encoder/layer_3/attention/self/dropout/GreaterEqual" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_BOOL + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/dropout/mul_1" + op: "Mul" + input: "bert/encoder/layer_3/attention/self/dropout/mul" + input: "bert/encoder/layer_3/attention/self/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/Reshape_2" + op: "Reshape" + input: "bert/encoder/layer_3/attention/self/value/BiasAdd" + input: "bert/encoder/layer_3/attention/self/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/transpose_2/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/transpose_2" + op: "Transpose" + input: "bert/encoder/layer_3/attention/self/Reshape_2" + input: "bert/encoder/layer_3/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/layer_3/attention/self/dropout/mul_1" + input: "bert/encoder/layer_3/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/transpose_3/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/transpose_3" + op: "Transpose" + input: "bert/encoder/layer_3/attention/self/MatMul_1" + input: "bert/encoder/layer_3/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/Reshape_3/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/Reshape_3" + op: "Reshape" + input: "bert/encoder/layer_3/attention/self/transpose_3" + input: "bert/encoder/layer_3/attention/self/Reshape_3/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_3/attention/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_3/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_3/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_3/attention/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_3/attention/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/dense/kernel" + input: "bert/encoder/layer_3/attention/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/dense/bias" + input: "bert/encoder/layer_3/attention/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_3/attention/self/Reshape_3" + input: "bert/encoder/layer_3/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_3/attention/output/dense/MatMul" + input: "bert/encoder/layer_3/attention/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dropout/rate" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_3/attention/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_3/attention/output/dropout/random_uniform/max" + input: "bert/encoder/layer_3/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_3/attention/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_3/attention/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_3/attention/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_3/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dropout/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dropout/sub" + op: "Sub" + input: "bert/encoder/layer_3/attention/output/dropout/sub/x" + input: "bert/encoder/layer_3/attention/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dropout/truediv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dropout/truediv" + op: "RealDiv" + input: "bert/encoder/layer_3/attention/output/dropout/truediv/x" + input: "bert/encoder/layer_3/attention/output/dropout/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dropout/GreaterEqual" + op: "GreaterEqual" + input: "bert/encoder/layer_3/attention/output/dropout/random_uniform" + input: "bert/encoder/layer_3/attention/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_3/attention/output/dense/BiasAdd" + input: "bert/encoder/layer_3/attention/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dropout/Cast" + op: "Cast" + input: "bert/encoder/layer_3/attention/output/dropout/GreaterEqual" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_BOOL + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dropout/mul_1" + op: "Mul" + input: "bert/encoder/layer_3/attention/output/dropout/mul" + input: "bert/encoder/layer_3/attention/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/add" + op: "Add" + input: "bert/encoder/layer_3/attention/output/dropout/mul_1" + input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_3/attention/output/add" + input: "bert/encoder/layer_3/attention/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_3/attention/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_3/attention/output/add" + input: "bert/encoder/layer_3/attention/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_3/attention/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_3/attention/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_3/attention/output/add" + input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_3/attention/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/read" + input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_3/intermediate/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_3/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_3/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_3/intermediate/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_3/intermediate/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_3/intermediate/dense/kernel" + input: "bert/encoder/layer_3/intermediate/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_3/intermediate/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/bias/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/bias/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_3/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_3/intermediate/dense/bias/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_3/intermediate/dense/bias" + input: "bert/encoder/layer_3/intermediate/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_3/intermediate/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_3/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_3/intermediate/dense/MatMul" + input: "bert/encoder/layer_3/intermediate/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/Pow/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 3.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/Pow" + op: "Pow" + input: "bert/encoder/layer_3/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_3/intermediate/dense/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/mul/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.044714998453855515 + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/mul" + op: "Mul" + input: "bert/encoder/layer_3/intermediate/dense/mul/x" + input: "bert/encoder/layer_3/intermediate/dense/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/add" + op: "Add" + input: "bert/encoder/layer_3/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_3/intermediate/dense/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/mul_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.7978845834732056 + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/mul_1" + op: "Mul" + input: "bert/encoder/layer_3/intermediate/dense/mul_1/x" + input: "bert/encoder/layer_3/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/Tanh" + op: "Tanh" + input: "bert/encoder/layer_3/intermediate/dense/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/add_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/add_1" + op: "Add" + input: "bert/encoder/layer_3/intermediate/dense/add_1/x" + input: "bert/encoder/layer_3/intermediate/dense/Tanh" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/mul_2/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.5 + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/mul_2" + op: "Mul" + input: "bert/encoder/layer_3/intermediate/dense/mul_2/x" + input: "bert/encoder/layer_3/intermediate/dense/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/mul_3" + op: "Mul" + input: "bert/encoder/layer_3/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_3/intermediate/dense/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_3/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_3/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_3/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_3/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_3/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_3/output/dense/kernel" + input: "bert/encoder/layer_3/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_3/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_3/output/dense/bias" + input: "bert/encoder/layer_3/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_3/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_3/intermediate/dense/mul_3" + input: "bert/encoder/layer_3/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_3/output/dense/MatMul" + input: "bert/encoder/layer_3/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_3/output/dropout/rate" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_3/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_3/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_3/output/dropout/random_uniform/max" + input: "bert/encoder/layer_3/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_3/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_3/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_3/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_3/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dropout/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dropout/sub" + op: "Sub" + input: "bert/encoder/layer_3/output/dropout/sub/x" + input: "bert/encoder/layer_3/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dropout/truediv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dropout/truediv" + op: "RealDiv" + input: "bert/encoder/layer_3/output/dropout/truediv/x" + input: "bert/encoder/layer_3/output/dropout/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dropout/GreaterEqual" + op: "GreaterEqual" + input: "bert/encoder/layer_3/output/dropout/random_uniform" + input: "bert/encoder/layer_3/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_3/output/dense/BiasAdd" + input: "bert/encoder/layer_3/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dropout/Cast" + op: "Cast" + input: "bert/encoder/layer_3/output/dropout/GreaterEqual" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_BOOL + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dropout/mul_1" + op: "Mul" + input: "bert/encoder/layer_3/output/dropout/mul" + input: "bert/encoder/layer_3/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/add" + op: "Add" + input: "bert/encoder/layer_3/output/dropout/mul_1" + input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_3/output/LayerNorm/beta" + input: "bert/encoder/layer_3/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_3/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_3/output/LayerNorm/gamma" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_3/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_3/output/add" + input: "bert/encoder/layer_3/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_3/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_3/output/add" + input: "bert/encoder/layer_3/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_3/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_3/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_3/output/add" + input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_3/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_3/output/LayerNorm/beta/read" + input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_4/attention/self/query/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_4/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_4/attention/self/query/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_4/attention/self/query/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_4/attention/self/query/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/query/kernel" + input: "bert/encoder/layer_4/attention/self/query/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/kernel/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/self/query/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/query/bias" + input: "bert/encoder/layer_4/attention/self/query/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/bias/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/self/query/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/MatMul" + op: "MatMul" + input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_4/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_4/attention/self/query/MatMul" + input: "bert/encoder/layer_4/attention/self/query/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_4/attention/self/key/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_4/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_4/attention/self/key/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_4/attention/self/key/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_4/attention/self/key/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/key/kernel" + input: "bert/encoder/layer_4/attention/self/key/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/kernel/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/self/key/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/key/bias" + input: "bert/encoder/layer_4/attention/self/key/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/bias/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/self/key/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/MatMul" + op: "MatMul" + input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_4/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_4/attention/self/key/MatMul" + input: "bert/encoder/layer_4/attention/self/key/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_4/attention/self/value/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_4/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_4/attention/self/value/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_4/attention/self/value/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_4/attention/self/value/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/value/kernel" + input: "bert/encoder/layer_4/attention/self/value/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/kernel/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/self/value/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/value/bias" + input: "bert/encoder/layer_4/attention/self/value/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/bias/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/self/value/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/MatMul" + op: "MatMul" + input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_4/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_4/attention/self/value/MatMul" + input: "bert/encoder/layer_4/attention/self/value/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/Reshape" + op: "Reshape" + input: "bert/encoder/layer_4/attention/self/query/BiasAdd" + input: "bert/encoder/layer_4/attention/self/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/transpose" + op: "Transpose" + input: "bert/encoder/layer_4/attention/self/Reshape" + input: "bert/encoder/layer_4/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/Reshape_1" + op: "Reshape" + input: "bert/encoder/layer_4/attention/self/key/BiasAdd" + input: "bert/encoder/layer_4/attention/self/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/transpose_1" + op: "Transpose" + input: "bert/encoder/layer_4/attention/self/Reshape_1" + input: "bert/encoder/layer_4/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/MatMul" + op: "BatchMatMulV2" + input: "bert/encoder/layer_4/attention/self/transpose" + input: "bert/encoder/layer_4/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/Mul/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.125 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/Mul" + op: "Mul" + input: "bert/encoder/layer_4/attention/self/MatMul" + input: "bert/encoder/layer_4/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/ExpandDims" + op: "ExpandDims" + input: "bert/encoder/mul" + input: "bert/encoder/layer_4/attention/self/ExpandDims/dim" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tdim" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/sub" + op: "Sub" + input: "bert/encoder/layer_4/attention/self/sub/x" + input: "bert/encoder/layer_4/attention/self/ExpandDims" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/mul_1/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: -10000.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/mul_1" + op: "Mul" + input: "bert/encoder/layer_4/attention/self/sub" + input: "bert/encoder/layer_4/attention/self/mul_1/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/add" + op: "Add" + input: "bert/encoder/layer_4/attention/self/Mul" + input: "bert/encoder/layer_4/attention/self/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/Softmax" + op: "Softmax" + input: "bert/encoder/layer_4/attention/self/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/dropout/rate" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_4/attention/self/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_4/attention/self/dropout/random_uniform/max" + input: "bert/encoder/layer_4/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_4/attention/self/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_4/attention/self/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_4/attention/self/dropout/random_uniform/mul" + input: "bert/encoder/layer_4/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/dropout/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/dropout/sub" + op: "Sub" + input: "bert/encoder/layer_4/attention/self/dropout/sub/x" + input: "bert/encoder/layer_4/attention/self/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/dropout/truediv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/dropout/truediv" + op: "RealDiv" + input: "bert/encoder/layer_4/attention/self/dropout/truediv/x" + input: "bert/encoder/layer_4/attention/self/dropout/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/dropout/GreaterEqual" + op: "GreaterEqual" + input: "bert/encoder/layer_4/attention/self/dropout/random_uniform" + input: "bert/encoder/layer_4/attention/self/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_4/attention/self/Softmax" + input: "bert/encoder/layer_4/attention/self/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/dropout/Cast" + op: "Cast" + input: "bert/encoder/layer_4/attention/self/dropout/GreaterEqual" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_BOOL + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/dropout/mul_1" + op: "Mul" + input: "bert/encoder/layer_4/attention/self/dropout/mul" + input: "bert/encoder/layer_4/attention/self/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/Reshape_2" + op: "Reshape" + input: "bert/encoder/layer_4/attention/self/value/BiasAdd" + input: "bert/encoder/layer_4/attention/self/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/transpose_2/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/transpose_2" + op: "Transpose" + input: "bert/encoder/layer_4/attention/self/Reshape_2" + input: "bert/encoder/layer_4/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/layer_4/attention/self/dropout/mul_1" + input: "bert/encoder/layer_4/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/transpose_3/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/transpose_3" + op: "Transpose" + input: "bert/encoder/layer_4/attention/self/MatMul_1" + input: "bert/encoder/layer_4/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/Reshape_3/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/Reshape_3" + op: "Reshape" + input: "bert/encoder/layer_4/attention/self/transpose_3" + input: "bert/encoder/layer_4/attention/self/Reshape_3/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_4/attention/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_4/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_4/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_4/attention/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_4/attention/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/dense/kernel" + input: "bert/encoder/layer_4/attention/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/dense/bias" + input: "bert/encoder/layer_4/attention/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_4/attention/self/Reshape_3" + input: "bert/encoder/layer_4/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_4/attention/output/dense/MatMul" + input: "bert/encoder/layer_4/attention/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dropout/rate" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_4/attention/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_4/attention/output/dropout/random_uniform/max" + input: "bert/encoder/layer_4/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_4/attention/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_4/attention/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_4/attention/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_4/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dropout/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dropout/sub" + op: "Sub" + input: "bert/encoder/layer_4/attention/output/dropout/sub/x" + input: "bert/encoder/layer_4/attention/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dropout/truediv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dropout/truediv" + op: "RealDiv" + input: "bert/encoder/layer_4/attention/output/dropout/truediv/x" + input: "bert/encoder/layer_4/attention/output/dropout/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dropout/GreaterEqual" + op: "GreaterEqual" + input: "bert/encoder/layer_4/attention/output/dropout/random_uniform" + input: "bert/encoder/layer_4/attention/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_4/attention/output/dense/BiasAdd" + input: "bert/encoder/layer_4/attention/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dropout/Cast" + op: "Cast" + input: "bert/encoder/layer_4/attention/output/dropout/GreaterEqual" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_BOOL + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dropout/mul_1" + op: "Mul" + input: "bert/encoder/layer_4/attention/output/dropout/mul" + input: "bert/encoder/layer_4/attention/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/add" + op: "Add" + input: "bert/encoder/layer_4/attention/output/dropout/mul_1" + input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_4/attention/output/add" + input: "bert/encoder/layer_4/attention/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_4/attention/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_4/attention/output/add" + input: "bert/encoder/layer_4/attention/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_4/attention/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_4/attention/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_4/attention/output/add" + input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_4/attention/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/read" + input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_4/intermediate/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_4/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_4/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_4/intermediate/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_4/intermediate/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_4/intermediate/dense/kernel" + input: "bert/encoder/layer_4/intermediate/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_4/intermediate/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/bias/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/bias/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_4/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_4/intermediate/dense/bias/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_4/intermediate/dense/bias" + input: "bert/encoder/layer_4/intermediate/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_4/intermediate/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_4/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_4/intermediate/dense/MatMul" + input: "bert/encoder/layer_4/intermediate/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/Pow/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 3.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/Pow" + op: "Pow" + input: "bert/encoder/layer_4/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_4/intermediate/dense/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/mul/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.044714998453855515 + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/mul" + op: "Mul" + input: "bert/encoder/layer_4/intermediate/dense/mul/x" + input: "bert/encoder/layer_4/intermediate/dense/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/add" + op: "Add" + input: "bert/encoder/layer_4/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_4/intermediate/dense/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/mul_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.7978845834732056 + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/mul_1" + op: "Mul" + input: "bert/encoder/layer_4/intermediate/dense/mul_1/x" + input: "bert/encoder/layer_4/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/Tanh" + op: "Tanh" + input: "bert/encoder/layer_4/intermediate/dense/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/add_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/add_1" + op: "Add" + input: "bert/encoder/layer_4/intermediate/dense/add_1/x" + input: "bert/encoder/layer_4/intermediate/dense/Tanh" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/mul_2/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.5 + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/mul_2" + op: "Mul" + input: "bert/encoder/layer_4/intermediate/dense/mul_2/x" + input: "bert/encoder/layer_4/intermediate/dense/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/mul_3" + op: "Mul" + input: "bert/encoder/layer_4/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_4/intermediate/dense/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_4/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_4/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_4/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_4/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_4/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_4/output/dense/kernel" + input: "bert/encoder/layer_4/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_4/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_4/output/dense/bias" + input: "bert/encoder/layer_4/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_4/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_4/intermediate/dense/mul_3" + input: "bert/encoder/layer_4/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_4/output/dense/MatMul" + input: "bert/encoder/layer_4/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_4/output/dropout/rate" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_4/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_4/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_4/output/dropout/random_uniform/max" + input: "bert/encoder/layer_4/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_4/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_4/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_4/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_4/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dropout/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dropout/sub" + op: "Sub" + input: "bert/encoder/layer_4/output/dropout/sub/x" + input: "bert/encoder/layer_4/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dropout/truediv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dropout/truediv" + op: "RealDiv" + input: "bert/encoder/layer_4/output/dropout/truediv/x" + input: "bert/encoder/layer_4/output/dropout/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dropout/GreaterEqual" + op: "GreaterEqual" + input: "bert/encoder/layer_4/output/dropout/random_uniform" + input: "bert/encoder/layer_4/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_4/output/dense/BiasAdd" + input: "bert/encoder/layer_4/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dropout/Cast" + op: "Cast" + input: "bert/encoder/layer_4/output/dropout/GreaterEqual" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_BOOL + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dropout/mul_1" + op: "Mul" + input: "bert/encoder/layer_4/output/dropout/mul" + input: "bert/encoder/layer_4/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/add" + op: "Add" + input: "bert/encoder/layer_4/output/dropout/mul_1" + input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_4/output/LayerNorm/beta" + input: "bert/encoder/layer_4/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_4/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_4/output/LayerNorm/gamma" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_4/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_4/output/add" + input: "bert/encoder/layer_4/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_4/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_4/output/add" + input: "bert/encoder/layer_4/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_4/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_4/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_4/output/add" + input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_4/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_4/output/LayerNorm/beta/read" + input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_5/attention/self/query/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_5/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_5/attention/self/query/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_5/attention/self/query/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_5/attention/self/query/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/query/kernel" + input: "bert/encoder/layer_5/attention/self/query/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/kernel/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/self/query/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/query/bias" + input: "bert/encoder/layer_5/attention/self/query/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/bias/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/self/query/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/MatMul" + op: "MatMul" + input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_5/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_5/attention/self/query/MatMul" + input: "bert/encoder/layer_5/attention/self/query/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_5/attention/self/key/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_5/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_5/attention/self/key/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_5/attention/self/key/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_5/attention/self/key/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/key/kernel" + input: "bert/encoder/layer_5/attention/self/key/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/kernel/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/self/key/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/key/bias" + input: "bert/encoder/layer_5/attention/self/key/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/bias/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/self/key/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/MatMul" + op: "MatMul" + input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_5/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_5/attention/self/key/MatMul" + input: "bert/encoder/layer_5/attention/self/key/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_5/attention/self/value/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_5/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_5/attention/self/value/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_5/attention/self/value/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_5/attention/self/value/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/value/kernel" + input: "bert/encoder/layer_5/attention/self/value/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/kernel/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/self/value/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/value/bias" + input: "bert/encoder/layer_5/attention/self/value/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/bias/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/self/value/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/MatMul" + op: "MatMul" + input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_5/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_5/attention/self/value/MatMul" + input: "bert/encoder/layer_5/attention/self/value/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/Reshape" + op: "Reshape" + input: "bert/encoder/layer_5/attention/self/query/BiasAdd" + input: "bert/encoder/layer_5/attention/self/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/transpose" + op: "Transpose" + input: "bert/encoder/layer_5/attention/self/Reshape" + input: "bert/encoder/layer_5/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/Reshape_1" + op: "Reshape" + input: "bert/encoder/layer_5/attention/self/key/BiasAdd" + input: "bert/encoder/layer_5/attention/self/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/transpose_1" + op: "Transpose" + input: "bert/encoder/layer_5/attention/self/Reshape_1" + input: "bert/encoder/layer_5/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/MatMul" + op: "BatchMatMulV2" + input: "bert/encoder/layer_5/attention/self/transpose" + input: "bert/encoder/layer_5/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/Mul/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.125 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/Mul" + op: "Mul" + input: "bert/encoder/layer_5/attention/self/MatMul" + input: "bert/encoder/layer_5/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/ExpandDims" + op: "ExpandDims" + input: "bert/encoder/mul" + input: "bert/encoder/layer_5/attention/self/ExpandDims/dim" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tdim" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/sub" + op: "Sub" + input: "bert/encoder/layer_5/attention/self/sub/x" + input: "bert/encoder/layer_5/attention/self/ExpandDims" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/mul_1/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: -10000.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/mul_1" + op: "Mul" + input: "bert/encoder/layer_5/attention/self/sub" + input: "bert/encoder/layer_5/attention/self/mul_1/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/add" + op: "Add" + input: "bert/encoder/layer_5/attention/self/Mul" + input: "bert/encoder/layer_5/attention/self/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/Softmax" + op: "Softmax" + input: "bert/encoder/layer_5/attention/self/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/dropout/rate" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_5/attention/self/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_5/attention/self/dropout/random_uniform/max" + input: "bert/encoder/layer_5/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_5/attention/self/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_5/attention/self/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_5/attention/self/dropout/random_uniform/mul" + input: "bert/encoder/layer_5/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/dropout/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/dropout/sub" + op: "Sub" + input: "bert/encoder/layer_5/attention/self/dropout/sub/x" + input: "bert/encoder/layer_5/attention/self/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/dropout/truediv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/dropout/truediv" + op: "RealDiv" + input: "bert/encoder/layer_5/attention/self/dropout/truediv/x" + input: "bert/encoder/layer_5/attention/self/dropout/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/dropout/GreaterEqual" + op: "GreaterEqual" + input: "bert/encoder/layer_5/attention/self/dropout/random_uniform" + input: "bert/encoder/layer_5/attention/self/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_5/attention/self/Softmax" + input: "bert/encoder/layer_5/attention/self/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/dropout/Cast" + op: "Cast" + input: "bert/encoder/layer_5/attention/self/dropout/GreaterEqual" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_BOOL + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/dropout/mul_1" + op: "Mul" + input: "bert/encoder/layer_5/attention/self/dropout/mul" + input: "bert/encoder/layer_5/attention/self/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/Reshape_2" + op: "Reshape" + input: "bert/encoder/layer_5/attention/self/value/BiasAdd" + input: "bert/encoder/layer_5/attention/self/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/transpose_2/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/transpose_2" + op: "Transpose" + input: "bert/encoder/layer_5/attention/self/Reshape_2" + input: "bert/encoder/layer_5/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/layer_5/attention/self/dropout/mul_1" + input: "bert/encoder/layer_5/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/transpose_3/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/transpose_3" + op: "Transpose" + input: "bert/encoder/layer_5/attention/self/MatMul_1" + input: "bert/encoder/layer_5/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/Reshape_3/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/Reshape_3" + op: "Reshape" + input: "bert/encoder/layer_5/attention/self/transpose_3" + input: "bert/encoder/layer_5/attention/self/Reshape_3/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_5/attention/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_5/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_5/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_5/attention/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_5/attention/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/dense/kernel" + input: "bert/encoder/layer_5/attention/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/dense/bias" + input: "bert/encoder/layer_5/attention/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_5/attention/self/Reshape_3" + input: "bert/encoder/layer_5/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_5/attention/output/dense/MatMul" + input: "bert/encoder/layer_5/attention/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dropout/rate" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_5/attention/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_5/attention/output/dropout/random_uniform/max" + input: "bert/encoder/layer_5/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_5/attention/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_5/attention/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_5/attention/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_5/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dropout/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dropout/sub" + op: "Sub" + input: "bert/encoder/layer_5/attention/output/dropout/sub/x" + input: "bert/encoder/layer_5/attention/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dropout/truediv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dropout/truediv" + op: "RealDiv" + input: "bert/encoder/layer_5/attention/output/dropout/truediv/x" + input: "bert/encoder/layer_5/attention/output/dropout/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dropout/GreaterEqual" + op: "GreaterEqual" + input: "bert/encoder/layer_5/attention/output/dropout/random_uniform" + input: "bert/encoder/layer_5/attention/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_5/attention/output/dense/BiasAdd" + input: "bert/encoder/layer_5/attention/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dropout/Cast" + op: "Cast" + input: "bert/encoder/layer_5/attention/output/dropout/GreaterEqual" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_BOOL + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dropout/mul_1" + op: "Mul" + input: "bert/encoder/layer_5/attention/output/dropout/mul" + input: "bert/encoder/layer_5/attention/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/add" + op: "Add" + input: "bert/encoder/layer_5/attention/output/dropout/mul_1" + input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_5/attention/output/add" + input: "bert/encoder/layer_5/attention/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_5/attention/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_5/attention/output/add" + input: "bert/encoder/layer_5/attention/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_5/attention/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_5/attention/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_5/attention/output/add" + input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_5/attention/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/read" + input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_5/intermediate/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_5/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_5/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_5/intermediate/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_5/intermediate/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_5/intermediate/dense/kernel" + input: "bert/encoder/layer_5/intermediate/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_5/intermediate/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/bias/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/bias/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_5/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_5/intermediate/dense/bias/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_5/intermediate/dense/bias" + input: "bert/encoder/layer_5/intermediate/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_5/intermediate/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_5/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_5/intermediate/dense/MatMul" + input: "bert/encoder/layer_5/intermediate/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/Pow/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 3.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/Pow" + op: "Pow" + input: "bert/encoder/layer_5/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_5/intermediate/dense/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/mul/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.044714998453855515 + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/mul" + op: "Mul" + input: "bert/encoder/layer_5/intermediate/dense/mul/x" + input: "bert/encoder/layer_5/intermediate/dense/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/add" + op: "Add" + input: "bert/encoder/layer_5/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_5/intermediate/dense/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/mul_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.7978845834732056 + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/mul_1" + op: "Mul" + input: "bert/encoder/layer_5/intermediate/dense/mul_1/x" + input: "bert/encoder/layer_5/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/Tanh" + op: "Tanh" + input: "bert/encoder/layer_5/intermediate/dense/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/add_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/add_1" + op: "Add" + input: "bert/encoder/layer_5/intermediate/dense/add_1/x" + input: "bert/encoder/layer_5/intermediate/dense/Tanh" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/mul_2/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.5 + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/mul_2" + op: "Mul" + input: "bert/encoder/layer_5/intermediate/dense/mul_2/x" + input: "bert/encoder/layer_5/intermediate/dense/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/mul_3" + op: "Mul" + input: "bert/encoder/layer_5/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_5/intermediate/dense/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_5/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_5/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_5/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_5/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_5/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_5/output/dense/kernel" + input: "bert/encoder/layer_5/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_5/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_5/output/dense/bias" + input: "bert/encoder/layer_5/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_5/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_5/intermediate/dense/mul_3" + input: "bert/encoder/layer_5/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_5/output/dense/MatMul" + input: "bert/encoder/layer_5/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_5/output/dropout/rate" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_5/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_5/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_5/output/dropout/random_uniform/max" + input: "bert/encoder/layer_5/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_5/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_5/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_5/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_5/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dropout/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dropout/sub" + op: "Sub" + input: "bert/encoder/layer_5/output/dropout/sub/x" + input: "bert/encoder/layer_5/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dropout/truediv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dropout/truediv" + op: "RealDiv" + input: "bert/encoder/layer_5/output/dropout/truediv/x" + input: "bert/encoder/layer_5/output/dropout/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dropout/GreaterEqual" + op: "GreaterEqual" + input: "bert/encoder/layer_5/output/dropout/random_uniform" + input: "bert/encoder/layer_5/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_5/output/dense/BiasAdd" + input: "bert/encoder/layer_5/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dropout/Cast" + op: "Cast" + input: "bert/encoder/layer_5/output/dropout/GreaterEqual" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_BOOL + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dropout/mul_1" + op: "Mul" + input: "bert/encoder/layer_5/output/dropout/mul" + input: "bert/encoder/layer_5/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/add" + op: "Add" + input: "bert/encoder/layer_5/output/dropout/mul_1" + input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_5/output/LayerNorm/beta" + input: "bert/encoder/layer_5/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_5/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_5/output/LayerNorm/gamma" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_5/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_5/output/add" + input: "bert/encoder/layer_5/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_5/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_5/output/add" + input: "bert/encoder/layer_5/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_5/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_5/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_5/output/add" + input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_5/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_5/output/LayerNorm/beta/read" + input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_6/attention/self/query/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_6/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_6/attention/self/query/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_6/attention/self/query/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_6/attention/self/query/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/query/kernel" + input: "bert/encoder/layer_6/attention/self/query/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/kernel/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/self/query/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/query/bias" + input: "bert/encoder/layer_6/attention/self/query/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/bias/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/self/query/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/MatMul" + op: "MatMul" + input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_6/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_6/attention/self/query/MatMul" + input: "bert/encoder/layer_6/attention/self/query/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_6/attention/self/key/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_6/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_6/attention/self/key/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_6/attention/self/key/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_6/attention/self/key/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/key/kernel" + input: "bert/encoder/layer_6/attention/self/key/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/kernel/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/self/key/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/key/bias" + input: "bert/encoder/layer_6/attention/self/key/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/bias/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/self/key/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/MatMul" + op: "MatMul" + input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_6/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_6/attention/self/key/MatMul" + input: "bert/encoder/layer_6/attention/self/key/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_6/attention/self/value/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_6/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_6/attention/self/value/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_6/attention/self/value/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_6/attention/self/value/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/value/kernel" + input: "bert/encoder/layer_6/attention/self/value/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/kernel/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/self/value/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/value/bias" + input: "bert/encoder/layer_6/attention/self/value/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/bias/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/self/value/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/MatMul" + op: "MatMul" + input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_6/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_6/attention/self/value/MatMul" + input: "bert/encoder/layer_6/attention/self/value/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/Reshape" + op: "Reshape" + input: "bert/encoder/layer_6/attention/self/query/BiasAdd" + input: "bert/encoder/layer_6/attention/self/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/transpose" + op: "Transpose" + input: "bert/encoder/layer_6/attention/self/Reshape" + input: "bert/encoder/layer_6/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/Reshape_1" + op: "Reshape" + input: "bert/encoder/layer_6/attention/self/key/BiasAdd" + input: "bert/encoder/layer_6/attention/self/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/transpose_1" + op: "Transpose" + input: "bert/encoder/layer_6/attention/self/Reshape_1" + input: "bert/encoder/layer_6/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/MatMul" + op: "BatchMatMulV2" + input: "bert/encoder/layer_6/attention/self/transpose" + input: "bert/encoder/layer_6/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/Mul/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.125 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/Mul" + op: "Mul" + input: "bert/encoder/layer_6/attention/self/MatMul" + input: "bert/encoder/layer_6/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/ExpandDims" + op: "ExpandDims" + input: "bert/encoder/mul" + input: "bert/encoder/layer_6/attention/self/ExpandDims/dim" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tdim" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/sub" + op: "Sub" + input: "bert/encoder/layer_6/attention/self/sub/x" + input: "bert/encoder/layer_6/attention/self/ExpandDims" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/mul_1/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: -10000.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/mul_1" + op: "Mul" + input: "bert/encoder/layer_6/attention/self/sub" + input: "bert/encoder/layer_6/attention/self/mul_1/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/add" + op: "Add" + input: "bert/encoder/layer_6/attention/self/Mul" + input: "bert/encoder/layer_6/attention/self/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/Softmax" + op: "Softmax" + input: "bert/encoder/layer_6/attention/self/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/dropout/rate" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_6/attention/self/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_6/attention/self/dropout/random_uniform/max" + input: "bert/encoder/layer_6/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_6/attention/self/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_6/attention/self/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_6/attention/self/dropout/random_uniform/mul" + input: "bert/encoder/layer_6/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/dropout/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/dropout/sub" + op: "Sub" + input: "bert/encoder/layer_6/attention/self/dropout/sub/x" + input: "bert/encoder/layer_6/attention/self/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/dropout/truediv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/dropout/truediv" + op: "RealDiv" + input: "bert/encoder/layer_6/attention/self/dropout/truediv/x" + input: "bert/encoder/layer_6/attention/self/dropout/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/dropout/GreaterEqual" + op: "GreaterEqual" + input: "bert/encoder/layer_6/attention/self/dropout/random_uniform" + input: "bert/encoder/layer_6/attention/self/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_6/attention/self/Softmax" + input: "bert/encoder/layer_6/attention/self/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/dropout/Cast" + op: "Cast" + input: "bert/encoder/layer_6/attention/self/dropout/GreaterEqual" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_BOOL + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/dropout/mul_1" + op: "Mul" + input: "bert/encoder/layer_6/attention/self/dropout/mul" + input: "bert/encoder/layer_6/attention/self/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/Reshape_2" + op: "Reshape" + input: "bert/encoder/layer_6/attention/self/value/BiasAdd" + input: "bert/encoder/layer_6/attention/self/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/transpose_2/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/transpose_2" + op: "Transpose" + input: "bert/encoder/layer_6/attention/self/Reshape_2" + input: "bert/encoder/layer_6/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/layer_6/attention/self/dropout/mul_1" + input: "bert/encoder/layer_6/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/transpose_3/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/transpose_3" + op: "Transpose" + input: "bert/encoder/layer_6/attention/self/MatMul_1" + input: "bert/encoder/layer_6/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/Reshape_3/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/Reshape_3" + op: "Reshape" + input: "bert/encoder/layer_6/attention/self/transpose_3" + input: "bert/encoder/layer_6/attention/self/Reshape_3/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_6/attention/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_6/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_6/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_6/attention/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_6/attention/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/dense/kernel" + input: "bert/encoder/layer_6/attention/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/dense/bias" + input: "bert/encoder/layer_6/attention/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_6/attention/self/Reshape_3" + input: "bert/encoder/layer_6/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_6/attention/output/dense/MatMul" + input: "bert/encoder/layer_6/attention/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dropout/rate" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_6/attention/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_6/attention/output/dropout/random_uniform/max" + input: "bert/encoder/layer_6/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_6/attention/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_6/attention/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_6/attention/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_6/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dropout/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dropout/sub" + op: "Sub" + input: "bert/encoder/layer_6/attention/output/dropout/sub/x" + input: "bert/encoder/layer_6/attention/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dropout/truediv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dropout/truediv" + op: "RealDiv" + input: "bert/encoder/layer_6/attention/output/dropout/truediv/x" + input: "bert/encoder/layer_6/attention/output/dropout/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dropout/GreaterEqual" + op: "GreaterEqual" + input: "bert/encoder/layer_6/attention/output/dropout/random_uniform" + input: "bert/encoder/layer_6/attention/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_6/attention/output/dense/BiasAdd" + input: "bert/encoder/layer_6/attention/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dropout/Cast" + op: "Cast" + input: "bert/encoder/layer_6/attention/output/dropout/GreaterEqual" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_BOOL + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dropout/mul_1" + op: "Mul" + input: "bert/encoder/layer_6/attention/output/dropout/mul" + input: "bert/encoder/layer_6/attention/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/add" + op: "Add" + input: "bert/encoder/layer_6/attention/output/dropout/mul_1" + input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_6/attention/output/add" + input: "bert/encoder/layer_6/attention/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_6/attention/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_6/attention/output/add" + input: "bert/encoder/layer_6/attention/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_6/attention/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_6/attention/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_6/attention/output/add" + input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_6/attention/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/read" + input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_6/intermediate/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_6/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_6/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_6/intermediate/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_6/intermediate/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_6/intermediate/dense/kernel" + input: "bert/encoder/layer_6/intermediate/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_6/intermediate/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/bias/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/bias/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_6/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_6/intermediate/dense/bias/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_6/intermediate/dense/bias" + input: "bert/encoder/layer_6/intermediate/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_6/intermediate/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_6/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_6/intermediate/dense/MatMul" + input: "bert/encoder/layer_6/intermediate/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/Pow/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 3.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/Pow" + op: "Pow" + input: "bert/encoder/layer_6/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_6/intermediate/dense/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/mul/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.044714998453855515 + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/mul" + op: "Mul" + input: "bert/encoder/layer_6/intermediate/dense/mul/x" + input: "bert/encoder/layer_6/intermediate/dense/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/add" + op: "Add" + input: "bert/encoder/layer_6/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_6/intermediate/dense/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/mul_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.7978845834732056 + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/mul_1" + op: "Mul" + input: "bert/encoder/layer_6/intermediate/dense/mul_1/x" + input: "bert/encoder/layer_6/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/Tanh" + op: "Tanh" + input: "bert/encoder/layer_6/intermediate/dense/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/add_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/add_1" + op: "Add" + input: "bert/encoder/layer_6/intermediate/dense/add_1/x" + input: "bert/encoder/layer_6/intermediate/dense/Tanh" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/mul_2/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.5 + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/mul_2" + op: "Mul" + input: "bert/encoder/layer_6/intermediate/dense/mul_2/x" + input: "bert/encoder/layer_6/intermediate/dense/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/mul_3" + op: "Mul" + input: "bert/encoder/layer_6/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_6/intermediate/dense/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_6/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_6/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_6/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_6/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_6/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_6/output/dense/kernel" + input: "bert/encoder/layer_6/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_6/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_6/output/dense/bias" + input: "bert/encoder/layer_6/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_6/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_6/intermediate/dense/mul_3" + input: "bert/encoder/layer_6/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_6/output/dense/MatMul" + input: "bert/encoder/layer_6/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_6/output/dropout/rate" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_6/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_6/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_6/output/dropout/random_uniform/max" + input: "bert/encoder/layer_6/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_6/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_6/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_6/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_6/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dropout/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dropout/sub" + op: "Sub" + input: "bert/encoder/layer_6/output/dropout/sub/x" + input: "bert/encoder/layer_6/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dropout/truediv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dropout/truediv" + op: "RealDiv" + input: "bert/encoder/layer_6/output/dropout/truediv/x" + input: "bert/encoder/layer_6/output/dropout/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dropout/GreaterEqual" + op: "GreaterEqual" + input: "bert/encoder/layer_6/output/dropout/random_uniform" + input: "bert/encoder/layer_6/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_6/output/dense/BiasAdd" + input: "bert/encoder/layer_6/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dropout/Cast" + op: "Cast" + input: "bert/encoder/layer_6/output/dropout/GreaterEqual" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_BOOL + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dropout/mul_1" + op: "Mul" + input: "bert/encoder/layer_6/output/dropout/mul" + input: "bert/encoder/layer_6/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/add" + op: "Add" + input: "bert/encoder/layer_6/output/dropout/mul_1" + input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_6/output/LayerNorm/beta" + input: "bert/encoder/layer_6/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_6/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_6/output/LayerNorm/gamma" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_6/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_6/output/add" + input: "bert/encoder/layer_6/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_6/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_6/output/add" + input: "bert/encoder/layer_6/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_6/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_6/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_6/output/add" + input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_6/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_6/output/LayerNorm/beta/read" + input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_7/attention/self/query/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_7/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_7/attention/self/query/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_7/attention/self/query/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_7/attention/self/query/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/query/kernel" + input: "bert/encoder/layer_7/attention/self/query/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/kernel/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/self/query/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/query/bias" + input: "bert/encoder/layer_7/attention/self/query/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/bias/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/self/query/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/MatMul" + op: "MatMul" + input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_7/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_7/attention/self/query/MatMul" + input: "bert/encoder/layer_7/attention/self/query/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_7/attention/self/key/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_7/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_7/attention/self/key/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_7/attention/self/key/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_7/attention/self/key/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/key/kernel" + input: "bert/encoder/layer_7/attention/self/key/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/kernel/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/self/key/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/key/bias" + input: "bert/encoder/layer_7/attention/self/key/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/bias/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/self/key/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/MatMul" + op: "MatMul" + input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_7/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_7/attention/self/key/MatMul" + input: "bert/encoder/layer_7/attention/self/key/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_7/attention/self/value/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_7/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_7/attention/self/value/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_7/attention/self/value/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_7/attention/self/value/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/value/kernel" + input: "bert/encoder/layer_7/attention/self/value/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/kernel/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/self/value/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/value/bias" + input: "bert/encoder/layer_7/attention/self/value/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/bias/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/self/value/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/MatMul" + op: "MatMul" + input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_7/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_7/attention/self/value/MatMul" + input: "bert/encoder/layer_7/attention/self/value/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/Reshape" + op: "Reshape" + input: "bert/encoder/layer_7/attention/self/query/BiasAdd" + input: "bert/encoder/layer_7/attention/self/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/transpose" + op: "Transpose" + input: "bert/encoder/layer_7/attention/self/Reshape" + input: "bert/encoder/layer_7/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/Reshape_1" + op: "Reshape" + input: "bert/encoder/layer_7/attention/self/key/BiasAdd" + input: "bert/encoder/layer_7/attention/self/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/transpose_1" + op: "Transpose" + input: "bert/encoder/layer_7/attention/self/Reshape_1" + input: "bert/encoder/layer_7/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/MatMul" + op: "BatchMatMulV2" + input: "bert/encoder/layer_7/attention/self/transpose" + input: "bert/encoder/layer_7/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/Mul/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.125 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/Mul" + op: "Mul" + input: "bert/encoder/layer_7/attention/self/MatMul" + input: "bert/encoder/layer_7/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/ExpandDims" + op: "ExpandDims" + input: "bert/encoder/mul" + input: "bert/encoder/layer_7/attention/self/ExpandDims/dim" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tdim" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/sub" + op: "Sub" + input: "bert/encoder/layer_7/attention/self/sub/x" + input: "bert/encoder/layer_7/attention/self/ExpandDims" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/mul_1/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: -10000.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/mul_1" + op: "Mul" + input: "bert/encoder/layer_7/attention/self/sub" + input: "bert/encoder/layer_7/attention/self/mul_1/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/add" + op: "Add" + input: "bert/encoder/layer_7/attention/self/Mul" + input: "bert/encoder/layer_7/attention/self/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/Softmax" + op: "Softmax" + input: "bert/encoder/layer_7/attention/self/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/dropout/rate" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_7/attention/self/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_7/attention/self/dropout/random_uniform/max" + input: "bert/encoder/layer_7/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_7/attention/self/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_7/attention/self/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_7/attention/self/dropout/random_uniform/mul" + input: "bert/encoder/layer_7/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/dropout/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/dropout/sub" + op: "Sub" + input: "bert/encoder/layer_7/attention/self/dropout/sub/x" + input: "bert/encoder/layer_7/attention/self/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/dropout/truediv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/dropout/truediv" + op: "RealDiv" + input: "bert/encoder/layer_7/attention/self/dropout/truediv/x" + input: "bert/encoder/layer_7/attention/self/dropout/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/dropout/GreaterEqual" + op: "GreaterEqual" + input: "bert/encoder/layer_7/attention/self/dropout/random_uniform" + input: "bert/encoder/layer_7/attention/self/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_7/attention/self/Softmax" + input: "bert/encoder/layer_7/attention/self/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/dropout/Cast" + op: "Cast" + input: "bert/encoder/layer_7/attention/self/dropout/GreaterEqual" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_BOOL + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/dropout/mul_1" + op: "Mul" + input: "bert/encoder/layer_7/attention/self/dropout/mul" + input: "bert/encoder/layer_7/attention/self/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/Reshape_2" + op: "Reshape" + input: "bert/encoder/layer_7/attention/self/value/BiasAdd" + input: "bert/encoder/layer_7/attention/self/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/transpose_2/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/transpose_2" + op: "Transpose" + input: "bert/encoder/layer_7/attention/self/Reshape_2" + input: "bert/encoder/layer_7/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/layer_7/attention/self/dropout/mul_1" + input: "bert/encoder/layer_7/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/transpose_3/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/transpose_3" + op: "Transpose" + input: "bert/encoder/layer_7/attention/self/MatMul_1" + input: "bert/encoder/layer_7/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/Reshape_3/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/Reshape_3" + op: "Reshape" + input: "bert/encoder/layer_7/attention/self/transpose_3" + input: "bert/encoder/layer_7/attention/self/Reshape_3/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_7/attention/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_7/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_7/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_7/attention/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_7/attention/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/dense/kernel" + input: "bert/encoder/layer_7/attention/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/dense/bias" + input: "bert/encoder/layer_7/attention/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_7/attention/self/Reshape_3" + input: "bert/encoder/layer_7/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_7/attention/output/dense/MatMul" + input: "bert/encoder/layer_7/attention/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dropout/rate" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_7/attention/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_7/attention/output/dropout/random_uniform/max" + input: "bert/encoder/layer_7/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_7/attention/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_7/attention/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_7/attention/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_7/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dropout/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dropout/sub" + op: "Sub" + input: "bert/encoder/layer_7/attention/output/dropout/sub/x" + input: "bert/encoder/layer_7/attention/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dropout/truediv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dropout/truediv" + op: "RealDiv" + input: "bert/encoder/layer_7/attention/output/dropout/truediv/x" + input: "bert/encoder/layer_7/attention/output/dropout/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dropout/GreaterEqual" + op: "GreaterEqual" + input: "bert/encoder/layer_7/attention/output/dropout/random_uniform" + input: "bert/encoder/layer_7/attention/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_7/attention/output/dense/BiasAdd" + input: "bert/encoder/layer_7/attention/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dropout/Cast" + op: "Cast" + input: "bert/encoder/layer_7/attention/output/dropout/GreaterEqual" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_BOOL + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dropout/mul_1" + op: "Mul" + input: "bert/encoder/layer_7/attention/output/dropout/mul" + input: "bert/encoder/layer_7/attention/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/add" + op: "Add" + input: "bert/encoder/layer_7/attention/output/dropout/mul_1" + input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_7/attention/output/add" + input: "bert/encoder/layer_7/attention/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_7/attention/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_7/attention/output/add" + input: "bert/encoder/layer_7/attention/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_7/attention/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_7/attention/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_7/attention/output/add" + input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_7/attention/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/read" + input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_7/intermediate/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_7/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_7/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_7/intermediate/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_7/intermediate/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_7/intermediate/dense/kernel" + input: "bert/encoder/layer_7/intermediate/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_7/intermediate/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/bias/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/bias/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_7/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_7/intermediate/dense/bias/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_7/intermediate/dense/bias" + input: "bert/encoder/layer_7/intermediate/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_7/intermediate/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_7/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_7/intermediate/dense/MatMul" + input: "bert/encoder/layer_7/intermediate/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/Pow/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 3.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/Pow" + op: "Pow" + input: "bert/encoder/layer_7/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_7/intermediate/dense/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/mul/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.044714998453855515 + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/mul" + op: "Mul" + input: "bert/encoder/layer_7/intermediate/dense/mul/x" + input: "bert/encoder/layer_7/intermediate/dense/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/add" + op: "Add" + input: "bert/encoder/layer_7/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_7/intermediate/dense/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/mul_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.7978845834732056 + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/mul_1" + op: "Mul" + input: "bert/encoder/layer_7/intermediate/dense/mul_1/x" + input: "bert/encoder/layer_7/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/Tanh" + op: "Tanh" + input: "bert/encoder/layer_7/intermediate/dense/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/add_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/add_1" + op: "Add" + input: "bert/encoder/layer_7/intermediate/dense/add_1/x" + input: "bert/encoder/layer_7/intermediate/dense/Tanh" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/mul_2/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.5 + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/mul_2" + op: "Mul" + input: "bert/encoder/layer_7/intermediate/dense/mul_2/x" + input: "bert/encoder/layer_7/intermediate/dense/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/mul_3" + op: "Mul" + input: "bert/encoder/layer_7/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_7/intermediate/dense/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_7/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_7/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_7/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_7/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_7/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_7/output/dense/kernel" + input: "bert/encoder/layer_7/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_7/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_7/output/dense/bias" + input: "bert/encoder/layer_7/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_7/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_7/intermediate/dense/mul_3" + input: "bert/encoder/layer_7/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_7/output/dense/MatMul" + input: "bert/encoder/layer_7/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_7/output/dropout/rate" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_7/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_7/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_7/output/dropout/random_uniform/max" + input: "bert/encoder/layer_7/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_7/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_7/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_7/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_7/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dropout/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dropout/sub" + op: "Sub" + input: "bert/encoder/layer_7/output/dropout/sub/x" + input: "bert/encoder/layer_7/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dropout/truediv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dropout/truediv" + op: "RealDiv" + input: "bert/encoder/layer_7/output/dropout/truediv/x" + input: "bert/encoder/layer_7/output/dropout/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dropout/GreaterEqual" + op: "GreaterEqual" + input: "bert/encoder/layer_7/output/dropout/random_uniform" + input: "bert/encoder/layer_7/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_7/output/dense/BiasAdd" + input: "bert/encoder/layer_7/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dropout/Cast" + op: "Cast" + input: "bert/encoder/layer_7/output/dropout/GreaterEqual" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_BOOL + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dropout/mul_1" + op: "Mul" + input: "bert/encoder/layer_7/output/dropout/mul" + input: "bert/encoder/layer_7/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/add" + op: "Add" + input: "bert/encoder/layer_7/output/dropout/mul_1" + input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_7/output/LayerNorm/beta" + input: "bert/encoder/layer_7/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_7/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_7/output/LayerNorm/gamma" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_7/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_7/output/add" + input: "bert/encoder/layer_7/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_7/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_7/output/add" + input: "bert/encoder/layer_7/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_7/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_7/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_7/output/add" + input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_7/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_7/output/LayerNorm/beta/read" + input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_8/attention/self/query/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_8/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_8/attention/self/query/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_8/attention/self/query/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_8/attention/self/query/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/query/kernel" + input: "bert/encoder/layer_8/attention/self/query/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/kernel/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/self/query/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/query/bias" + input: "bert/encoder/layer_8/attention/self/query/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/bias/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/self/query/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/MatMul" + op: "MatMul" + input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_8/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_8/attention/self/query/MatMul" + input: "bert/encoder/layer_8/attention/self/query/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_8/attention/self/key/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_8/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_8/attention/self/key/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_8/attention/self/key/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_8/attention/self/key/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/key/kernel" + input: "bert/encoder/layer_8/attention/self/key/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/kernel/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/self/key/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/key/bias" + input: "bert/encoder/layer_8/attention/self/key/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/bias/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/self/key/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/MatMul" + op: "MatMul" + input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_8/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_8/attention/self/key/MatMul" + input: "bert/encoder/layer_8/attention/self/key/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_8/attention/self/value/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_8/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_8/attention/self/value/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_8/attention/self/value/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_8/attention/self/value/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/value/kernel" + input: "bert/encoder/layer_8/attention/self/value/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/kernel/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/self/value/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/value/bias" + input: "bert/encoder/layer_8/attention/self/value/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/bias/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/self/value/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/MatMul" + op: "MatMul" + input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_8/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_8/attention/self/value/MatMul" + input: "bert/encoder/layer_8/attention/self/value/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/Reshape" + op: "Reshape" + input: "bert/encoder/layer_8/attention/self/query/BiasAdd" + input: "bert/encoder/layer_8/attention/self/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/transpose" + op: "Transpose" + input: "bert/encoder/layer_8/attention/self/Reshape" + input: "bert/encoder/layer_8/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/Reshape_1" + op: "Reshape" + input: "bert/encoder/layer_8/attention/self/key/BiasAdd" + input: "bert/encoder/layer_8/attention/self/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/transpose_1" + op: "Transpose" + input: "bert/encoder/layer_8/attention/self/Reshape_1" + input: "bert/encoder/layer_8/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/MatMul" + op: "BatchMatMulV2" + input: "bert/encoder/layer_8/attention/self/transpose" + input: "bert/encoder/layer_8/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/Mul/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.125 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/Mul" + op: "Mul" + input: "bert/encoder/layer_8/attention/self/MatMul" + input: "bert/encoder/layer_8/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/ExpandDims" + op: "ExpandDims" + input: "bert/encoder/mul" + input: "bert/encoder/layer_8/attention/self/ExpandDims/dim" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tdim" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/sub" + op: "Sub" + input: "bert/encoder/layer_8/attention/self/sub/x" + input: "bert/encoder/layer_8/attention/self/ExpandDims" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/mul_1/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: -10000.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/mul_1" + op: "Mul" + input: "bert/encoder/layer_8/attention/self/sub" + input: "bert/encoder/layer_8/attention/self/mul_1/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/add" + op: "Add" + input: "bert/encoder/layer_8/attention/self/Mul" + input: "bert/encoder/layer_8/attention/self/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/Softmax" + op: "Softmax" + input: "bert/encoder/layer_8/attention/self/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/dropout/rate" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_8/attention/self/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_8/attention/self/dropout/random_uniform/max" + input: "bert/encoder/layer_8/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_8/attention/self/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_8/attention/self/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_8/attention/self/dropout/random_uniform/mul" + input: "bert/encoder/layer_8/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/dropout/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/dropout/sub" + op: "Sub" + input: "bert/encoder/layer_8/attention/self/dropout/sub/x" + input: "bert/encoder/layer_8/attention/self/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/dropout/truediv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/dropout/truediv" + op: "RealDiv" + input: "bert/encoder/layer_8/attention/self/dropout/truediv/x" + input: "bert/encoder/layer_8/attention/self/dropout/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/dropout/GreaterEqual" + op: "GreaterEqual" + input: "bert/encoder/layer_8/attention/self/dropout/random_uniform" + input: "bert/encoder/layer_8/attention/self/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_8/attention/self/Softmax" + input: "bert/encoder/layer_8/attention/self/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/dropout/Cast" + op: "Cast" + input: "bert/encoder/layer_8/attention/self/dropout/GreaterEqual" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_BOOL + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/dropout/mul_1" + op: "Mul" + input: "bert/encoder/layer_8/attention/self/dropout/mul" + input: "bert/encoder/layer_8/attention/self/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/Reshape_2" + op: "Reshape" + input: "bert/encoder/layer_8/attention/self/value/BiasAdd" + input: "bert/encoder/layer_8/attention/self/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/transpose_2/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/transpose_2" + op: "Transpose" + input: "bert/encoder/layer_8/attention/self/Reshape_2" + input: "bert/encoder/layer_8/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/layer_8/attention/self/dropout/mul_1" + input: "bert/encoder/layer_8/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/transpose_3/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/transpose_3" + op: "Transpose" + input: "bert/encoder/layer_8/attention/self/MatMul_1" + input: "bert/encoder/layer_8/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/Reshape_3/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/Reshape_3" + op: "Reshape" + input: "bert/encoder/layer_8/attention/self/transpose_3" + input: "bert/encoder/layer_8/attention/self/Reshape_3/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_8/attention/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_8/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_8/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_8/attention/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_8/attention/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/dense/kernel" + input: "bert/encoder/layer_8/attention/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/dense/bias" + input: "bert/encoder/layer_8/attention/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_8/attention/self/Reshape_3" + input: "bert/encoder/layer_8/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_8/attention/output/dense/MatMul" + input: "bert/encoder/layer_8/attention/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dropout/rate" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_8/attention/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_8/attention/output/dropout/random_uniform/max" + input: "bert/encoder/layer_8/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_8/attention/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_8/attention/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_8/attention/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_8/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dropout/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dropout/sub" + op: "Sub" + input: "bert/encoder/layer_8/attention/output/dropout/sub/x" + input: "bert/encoder/layer_8/attention/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dropout/truediv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dropout/truediv" + op: "RealDiv" + input: "bert/encoder/layer_8/attention/output/dropout/truediv/x" + input: "bert/encoder/layer_8/attention/output/dropout/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dropout/GreaterEqual" + op: "GreaterEqual" + input: "bert/encoder/layer_8/attention/output/dropout/random_uniform" + input: "bert/encoder/layer_8/attention/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_8/attention/output/dense/BiasAdd" + input: "bert/encoder/layer_8/attention/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dropout/Cast" + op: "Cast" + input: "bert/encoder/layer_8/attention/output/dropout/GreaterEqual" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_BOOL + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dropout/mul_1" + op: "Mul" + input: "bert/encoder/layer_8/attention/output/dropout/mul" + input: "bert/encoder/layer_8/attention/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/add" + op: "Add" + input: "bert/encoder/layer_8/attention/output/dropout/mul_1" + input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_8/attention/output/add" + input: "bert/encoder/layer_8/attention/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_8/attention/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_8/attention/output/add" + input: "bert/encoder/layer_8/attention/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_8/attention/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_8/attention/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_8/attention/output/add" + input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_8/attention/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/read" + input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_8/intermediate/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_8/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_8/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_8/intermediate/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_8/intermediate/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_8/intermediate/dense/kernel" + input: "bert/encoder/layer_8/intermediate/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_8/intermediate/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/bias/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/bias/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_8/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_8/intermediate/dense/bias/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_8/intermediate/dense/bias" + input: "bert/encoder/layer_8/intermediate/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_8/intermediate/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_8/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_8/intermediate/dense/MatMul" + input: "bert/encoder/layer_8/intermediate/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/Pow/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 3.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/Pow" + op: "Pow" + input: "bert/encoder/layer_8/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_8/intermediate/dense/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/mul/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.044714998453855515 + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/mul" + op: "Mul" + input: "bert/encoder/layer_8/intermediate/dense/mul/x" + input: "bert/encoder/layer_8/intermediate/dense/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/add" + op: "Add" + input: "bert/encoder/layer_8/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_8/intermediate/dense/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/mul_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.7978845834732056 + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/mul_1" + op: "Mul" + input: "bert/encoder/layer_8/intermediate/dense/mul_1/x" + input: "bert/encoder/layer_8/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/Tanh" + op: "Tanh" + input: "bert/encoder/layer_8/intermediate/dense/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/add_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/add_1" + op: "Add" + input: "bert/encoder/layer_8/intermediate/dense/add_1/x" + input: "bert/encoder/layer_8/intermediate/dense/Tanh" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/mul_2/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.5 + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/mul_2" + op: "Mul" + input: "bert/encoder/layer_8/intermediate/dense/mul_2/x" + input: "bert/encoder/layer_8/intermediate/dense/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/mul_3" + op: "Mul" + input: "bert/encoder/layer_8/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_8/intermediate/dense/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_8/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_8/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_8/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_8/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_8/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_8/output/dense/kernel" + input: "bert/encoder/layer_8/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_8/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_8/output/dense/bias" + input: "bert/encoder/layer_8/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_8/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_8/intermediate/dense/mul_3" + input: "bert/encoder/layer_8/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_8/output/dense/MatMul" + input: "bert/encoder/layer_8/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_8/output/dropout/rate" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_8/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_8/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_8/output/dropout/random_uniform/max" + input: "bert/encoder/layer_8/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_8/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_8/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_8/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_8/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dropout/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dropout/sub" + op: "Sub" + input: "bert/encoder/layer_8/output/dropout/sub/x" + input: "bert/encoder/layer_8/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dropout/truediv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dropout/truediv" + op: "RealDiv" + input: "bert/encoder/layer_8/output/dropout/truediv/x" + input: "bert/encoder/layer_8/output/dropout/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dropout/GreaterEqual" + op: "GreaterEqual" + input: "bert/encoder/layer_8/output/dropout/random_uniform" + input: "bert/encoder/layer_8/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_8/output/dense/BiasAdd" + input: "bert/encoder/layer_8/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dropout/Cast" + op: "Cast" + input: "bert/encoder/layer_8/output/dropout/GreaterEqual" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_BOOL + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dropout/mul_1" + op: "Mul" + input: "bert/encoder/layer_8/output/dropout/mul" + input: "bert/encoder/layer_8/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/add" + op: "Add" + input: "bert/encoder/layer_8/output/dropout/mul_1" + input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_8/output/LayerNorm/beta" + input: "bert/encoder/layer_8/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_8/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_8/output/LayerNorm/gamma" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_8/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_8/output/add" + input: "bert/encoder/layer_8/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_8/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_8/output/add" + input: "bert/encoder/layer_8/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_8/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_8/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_8/output/add" + input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_8/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_8/output/LayerNorm/beta/read" + input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_9/attention/self/query/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_9/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_9/attention/self/query/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_9/attention/self/query/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_9/attention/self/query/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/query/kernel" + input: "bert/encoder/layer_9/attention/self/query/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/kernel/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/self/query/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/query/bias" + input: "bert/encoder/layer_9/attention/self/query/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/bias/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/self/query/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/MatMul" + op: "MatMul" + input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_9/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_9/attention/self/query/MatMul" + input: "bert/encoder/layer_9/attention/self/query/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_9/attention/self/key/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_9/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_9/attention/self/key/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_9/attention/self/key/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_9/attention/self/key/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/key/kernel" + input: "bert/encoder/layer_9/attention/self/key/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/kernel/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/self/key/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/key/bias" + input: "bert/encoder/layer_9/attention/self/key/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/bias/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/self/key/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/MatMul" + op: "MatMul" + input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_9/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_9/attention/self/key/MatMul" + input: "bert/encoder/layer_9/attention/self/key/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_9/attention/self/value/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_9/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_9/attention/self/value/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_9/attention/self/value/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_9/attention/self/value/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/value/kernel" + input: "bert/encoder/layer_9/attention/self/value/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/kernel/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/self/value/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/value/bias" + input: "bert/encoder/layer_9/attention/self/value/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/bias/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/self/value/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/MatMul" + op: "MatMul" + input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_9/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_9/attention/self/value/MatMul" + input: "bert/encoder/layer_9/attention/self/value/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/Reshape" + op: "Reshape" + input: "bert/encoder/layer_9/attention/self/query/BiasAdd" + input: "bert/encoder/layer_9/attention/self/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/transpose" + op: "Transpose" + input: "bert/encoder/layer_9/attention/self/Reshape" + input: "bert/encoder/layer_9/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/Reshape_1" + op: "Reshape" + input: "bert/encoder/layer_9/attention/self/key/BiasAdd" + input: "bert/encoder/layer_9/attention/self/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/transpose_1" + op: "Transpose" + input: "bert/encoder/layer_9/attention/self/Reshape_1" + input: "bert/encoder/layer_9/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/MatMul" + op: "BatchMatMulV2" + input: "bert/encoder/layer_9/attention/self/transpose" + input: "bert/encoder/layer_9/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/Mul/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.125 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/Mul" + op: "Mul" + input: "bert/encoder/layer_9/attention/self/MatMul" + input: "bert/encoder/layer_9/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/ExpandDims" + op: "ExpandDims" + input: "bert/encoder/mul" + input: "bert/encoder/layer_9/attention/self/ExpandDims/dim" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tdim" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/sub" + op: "Sub" + input: "bert/encoder/layer_9/attention/self/sub/x" + input: "bert/encoder/layer_9/attention/self/ExpandDims" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/mul_1/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: -10000.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/mul_1" + op: "Mul" + input: "bert/encoder/layer_9/attention/self/sub" + input: "bert/encoder/layer_9/attention/self/mul_1/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/add" + op: "Add" + input: "bert/encoder/layer_9/attention/self/Mul" + input: "bert/encoder/layer_9/attention/self/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/Softmax" + op: "Softmax" + input: "bert/encoder/layer_9/attention/self/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/dropout/rate" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_9/attention/self/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_9/attention/self/dropout/random_uniform/max" + input: "bert/encoder/layer_9/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_9/attention/self/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_9/attention/self/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_9/attention/self/dropout/random_uniform/mul" + input: "bert/encoder/layer_9/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/dropout/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/dropout/sub" + op: "Sub" + input: "bert/encoder/layer_9/attention/self/dropout/sub/x" + input: "bert/encoder/layer_9/attention/self/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/dropout/truediv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/dropout/truediv" + op: "RealDiv" + input: "bert/encoder/layer_9/attention/self/dropout/truediv/x" + input: "bert/encoder/layer_9/attention/self/dropout/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/dropout/GreaterEqual" + op: "GreaterEqual" + input: "bert/encoder/layer_9/attention/self/dropout/random_uniform" + input: "bert/encoder/layer_9/attention/self/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_9/attention/self/Softmax" + input: "bert/encoder/layer_9/attention/self/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/dropout/Cast" + op: "Cast" + input: "bert/encoder/layer_9/attention/self/dropout/GreaterEqual" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_BOOL + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/dropout/mul_1" + op: "Mul" + input: "bert/encoder/layer_9/attention/self/dropout/mul" + input: "bert/encoder/layer_9/attention/self/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/Reshape_2" + op: "Reshape" + input: "bert/encoder/layer_9/attention/self/value/BiasAdd" + input: "bert/encoder/layer_9/attention/self/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/transpose_2/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/transpose_2" + op: "Transpose" + input: "bert/encoder/layer_9/attention/self/Reshape_2" + input: "bert/encoder/layer_9/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/layer_9/attention/self/dropout/mul_1" + input: "bert/encoder/layer_9/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/transpose_3/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/transpose_3" + op: "Transpose" + input: "bert/encoder/layer_9/attention/self/MatMul_1" + input: "bert/encoder/layer_9/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/Reshape_3/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/Reshape_3" + op: "Reshape" + input: "bert/encoder/layer_9/attention/self/transpose_3" + input: "bert/encoder/layer_9/attention/self/Reshape_3/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_9/attention/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_9/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_9/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_9/attention/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_9/attention/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/dense/kernel" + input: "bert/encoder/layer_9/attention/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/dense/bias" + input: "bert/encoder/layer_9/attention/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_9/attention/self/Reshape_3" + input: "bert/encoder/layer_9/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_9/attention/output/dense/MatMul" + input: "bert/encoder/layer_9/attention/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dropout/rate" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_9/attention/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_9/attention/output/dropout/random_uniform/max" + input: "bert/encoder/layer_9/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_9/attention/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_9/attention/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_9/attention/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_9/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dropout/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dropout/sub" + op: "Sub" + input: "bert/encoder/layer_9/attention/output/dropout/sub/x" + input: "bert/encoder/layer_9/attention/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dropout/truediv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dropout/truediv" + op: "RealDiv" + input: "bert/encoder/layer_9/attention/output/dropout/truediv/x" + input: "bert/encoder/layer_9/attention/output/dropout/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dropout/GreaterEqual" + op: "GreaterEqual" + input: "bert/encoder/layer_9/attention/output/dropout/random_uniform" + input: "bert/encoder/layer_9/attention/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_9/attention/output/dense/BiasAdd" + input: "bert/encoder/layer_9/attention/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dropout/Cast" + op: "Cast" + input: "bert/encoder/layer_9/attention/output/dropout/GreaterEqual" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_BOOL + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dropout/mul_1" + op: "Mul" + input: "bert/encoder/layer_9/attention/output/dropout/mul" + input: "bert/encoder/layer_9/attention/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/add" + op: "Add" + input: "bert/encoder/layer_9/attention/output/dropout/mul_1" + input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_9/attention/output/add" + input: "bert/encoder/layer_9/attention/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_9/attention/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_9/attention/output/add" + input: "bert/encoder/layer_9/attention/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_9/attention/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_9/attention/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_9/attention/output/add" + input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_9/attention/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/read" + input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_9/intermediate/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_9/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_9/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_9/intermediate/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_9/intermediate/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_9/intermediate/dense/kernel" + input: "bert/encoder/layer_9/intermediate/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_9/intermediate/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/bias/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/bias/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_9/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_9/intermediate/dense/bias/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_9/intermediate/dense/bias" + input: "bert/encoder/layer_9/intermediate/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_9/intermediate/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_9/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_9/intermediate/dense/MatMul" + input: "bert/encoder/layer_9/intermediate/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/Pow/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 3.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/Pow" + op: "Pow" + input: "bert/encoder/layer_9/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_9/intermediate/dense/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/mul/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.044714998453855515 + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/mul" + op: "Mul" + input: "bert/encoder/layer_9/intermediate/dense/mul/x" + input: "bert/encoder/layer_9/intermediate/dense/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/add" + op: "Add" + input: "bert/encoder/layer_9/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_9/intermediate/dense/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/mul_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.7978845834732056 + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/mul_1" + op: "Mul" + input: "bert/encoder/layer_9/intermediate/dense/mul_1/x" + input: "bert/encoder/layer_9/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/Tanh" + op: "Tanh" + input: "bert/encoder/layer_9/intermediate/dense/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/add_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/add_1" + op: "Add" + input: "bert/encoder/layer_9/intermediate/dense/add_1/x" + input: "bert/encoder/layer_9/intermediate/dense/Tanh" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/mul_2/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.5 + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/mul_2" + op: "Mul" + input: "bert/encoder/layer_9/intermediate/dense/mul_2/x" + input: "bert/encoder/layer_9/intermediate/dense/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/mul_3" + op: "Mul" + input: "bert/encoder/layer_9/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_9/intermediate/dense/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_9/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_9/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_9/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_9/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_9/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_9/output/dense/kernel" + input: "bert/encoder/layer_9/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_9/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_9/output/dense/bias" + input: "bert/encoder/layer_9/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_9/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_9/intermediate/dense/mul_3" + input: "bert/encoder/layer_9/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_9/output/dense/MatMul" + input: "bert/encoder/layer_9/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_9/output/dropout/rate" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_9/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_9/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_9/output/dropout/random_uniform/max" + input: "bert/encoder/layer_9/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_9/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_9/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_9/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_9/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dropout/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dropout/sub" + op: "Sub" + input: "bert/encoder/layer_9/output/dropout/sub/x" + input: "bert/encoder/layer_9/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dropout/truediv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dropout/truediv" + op: "RealDiv" + input: "bert/encoder/layer_9/output/dropout/truediv/x" + input: "bert/encoder/layer_9/output/dropout/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dropout/GreaterEqual" + op: "GreaterEqual" + input: "bert/encoder/layer_9/output/dropout/random_uniform" + input: "bert/encoder/layer_9/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_9/output/dense/BiasAdd" + input: "bert/encoder/layer_9/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dropout/Cast" + op: "Cast" + input: "bert/encoder/layer_9/output/dropout/GreaterEqual" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_BOOL + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dropout/mul_1" + op: "Mul" + input: "bert/encoder/layer_9/output/dropout/mul" + input: "bert/encoder/layer_9/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/add" + op: "Add" + input: "bert/encoder/layer_9/output/dropout/mul_1" + input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_9/output/LayerNorm/beta" + input: "bert/encoder/layer_9/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_9/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_9/output/LayerNorm/gamma" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_9/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_9/output/add" + input: "bert/encoder/layer_9/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_9/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_9/output/add" + input: "bert/encoder/layer_9/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_9/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_9/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_9/output/add" + input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_9/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_9/output/LayerNorm/beta/read" + input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_10/attention/self/query/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_10/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_10/attention/self/query/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_10/attention/self/query/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_10/attention/self/query/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/query/kernel" + input: "bert/encoder/layer_10/attention/self/query/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/kernel/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/self/query/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/query/bias" + input: "bert/encoder/layer_10/attention/self/query/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/bias/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/self/query/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/MatMul" + op: "MatMul" + input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_10/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_10/attention/self/query/MatMul" + input: "bert/encoder/layer_10/attention/self/query/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_10/attention/self/key/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_10/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_10/attention/self/key/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_10/attention/self/key/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_10/attention/self/key/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/key/kernel" + input: "bert/encoder/layer_10/attention/self/key/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/kernel/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/self/key/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/key/bias" + input: "bert/encoder/layer_10/attention/self/key/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/bias/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/self/key/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/MatMul" + op: "MatMul" + input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_10/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_10/attention/self/key/MatMul" + input: "bert/encoder/layer_10/attention/self/key/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_10/attention/self/value/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_10/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_10/attention/self/value/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_10/attention/self/value/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_10/attention/self/value/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/value/kernel" + input: "bert/encoder/layer_10/attention/self/value/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/kernel/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/self/value/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/value/bias" + input: "bert/encoder/layer_10/attention/self/value/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/bias/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/self/value/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/MatMul" + op: "MatMul" + input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_10/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_10/attention/self/value/MatMul" + input: "bert/encoder/layer_10/attention/self/value/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/Reshape" + op: "Reshape" + input: "bert/encoder/layer_10/attention/self/query/BiasAdd" + input: "bert/encoder/layer_10/attention/self/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/transpose" + op: "Transpose" + input: "bert/encoder/layer_10/attention/self/Reshape" + input: "bert/encoder/layer_10/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/Reshape_1" + op: "Reshape" + input: "bert/encoder/layer_10/attention/self/key/BiasAdd" + input: "bert/encoder/layer_10/attention/self/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/transpose_1" + op: "Transpose" + input: "bert/encoder/layer_10/attention/self/Reshape_1" + input: "bert/encoder/layer_10/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/MatMul" + op: "BatchMatMulV2" + input: "bert/encoder/layer_10/attention/self/transpose" + input: "bert/encoder/layer_10/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/Mul/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.125 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/Mul" + op: "Mul" + input: "bert/encoder/layer_10/attention/self/MatMul" + input: "bert/encoder/layer_10/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/ExpandDims" + op: "ExpandDims" + input: "bert/encoder/mul" + input: "bert/encoder/layer_10/attention/self/ExpandDims/dim" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tdim" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/sub" + op: "Sub" + input: "bert/encoder/layer_10/attention/self/sub/x" + input: "bert/encoder/layer_10/attention/self/ExpandDims" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/mul_1/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: -10000.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/mul_1" + op: "Mul" + input: "bert/encoder/layer_10/attention/self/sub" + input: "bert/encoder/layer_10/attention/self/mul_1/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/add" + op: "Add" + input: "bert/encoder/layer_10/attention/self/Mul" + input: "bert/encoder/layer_10/attention/self/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/Softmax" + op: "Softmax" + input: "bert/encoder/layer_10/attention/self/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/dropout/rate" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_10/attention/self/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_10/attention/self/dropout/random_uniform/max" + input: "bert/encoder/layer_10/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_10/attention/self/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_10/attention/self/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_10/attention/self/dropout/random_uniform/mul" + input: "bert/encoder/layer_10/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/dropout/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/dropout/sub" + op: "Sub" + input: "bert/encoder/layer_10/attention/self/dropout/sub/x" + input: "bert/encoder/layer_10/attention/self/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/dropout/truediv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/dropout/truediv" + op: "RealDiv" + input: "bert/encoder/layer_10/attention/self/dropout/truediv/x" + input: "bert/encoder/layer_10/attention/self/dropout/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/dropout/GreaterEqual" + op: "GreaterEqual" + input: "bert/encoder/layer_10/attention/self/dropout/random_uniform" + input: "bert/encoder/layer_10/attention/self/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_10/attention/self/Softmax" + input: "bert/encoder/layer_10/attention/self/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/dropout/Cast" + op: "Cast" + input: "bert/encoder/layer_10/attention/self/dropout/GreaterEqual" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_BOOL + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/dropout/mul_1" + op: "Mul" + input: "bert/encoder/layer_10/attention/self/dropout/mul" + input: "bert/encoder/layer_10/attention/self/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/Reshape_2" + op: "Reshape" + input: "bert/encoder/layer_10/attention/self/value/BiasAdd" + input: "bert/encoder/layer_10/attention/self/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/transpose_2/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/transpose_2" + op: "Transpose" + input: "bert/encoder/layer_10/attention/self/Reshape_2" + input: "bert/encoder/layer_10/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/layer_10/attention/self/dropout/mul_1" + input: "bert/encoder/layer_10/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/transpose_3/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/transpose_3" + op: "Transpose" + input: "bert/encoder/layer_10/attention/self/MatMul_1" + input: "bert/encoder/layer_10/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/Reshape_3/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/Reshape_3" + op: "Reshape" + input: "bert/encoder/layer_10/attention/self/transpose_3" + input: "bert/encoder/layer_10/attention/self/Reshape_3/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_10/attention/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_10/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_10/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_10/attention/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_10/attention/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/dense/kernel" + input: "bert/encoder/layer_10/attention/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/dense/bias" + input: "bert/encoder/layer_10/attention/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_10/attention/self/Reshape_3" + input: "bert/encoder/layer_10/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_10/attention/output/dense/MatMul" + input: "bert/encoder/layer_10/attention/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dropout/rate" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_10/attention/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_10/attention/output/dropout/random_uniform/max" + input: "bert/encoder/layer_10/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_10/attention/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_10/attention/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_10/attention/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_10/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dropout/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dropout/sub" + op: "Sub" + input: "bert/encoder/layer_10/attention/output/dropout/sub/x" + input: "bert/encoder/layer_10/attention/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dropout/truediv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dropout/truediv" + op: "RealDiv" + input: "bert/encoder/layer_10/attention/output/dropout/truediv/x" + input: "bert/encoder/layer_10/attention/output/dropout/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dropout/GreaterEqual" + op: "GreaterEqual" + input: "bert/encoder/layer_10/attention/output/dropout/random_uniform" + input: "bert/encoder/layer_10/attention/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_10/attention/output/dense/BiasAdd" + input: "bert/encoder/layer_10/attention/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dropout/Cast" + op: "Cast" + input: "bert/encoder/layer_10/attention/output/dropout/GreaterEqual" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_BOOL + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dropout/mul_1" + op: "Mul" + input: "bert/encoder/layer_10/attention/output/dropout/mul" + input: "bert/encoder/layer_10/attention/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/add" + op: "Add" + input: "bert/encoder/layer_10/attention/output/dropout/mul_1" + input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_10/attention/output/add" + input: "bert/encoder/layer_10/attention/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_10/attention/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_10/attention/output/add" + input: "bert/encoder/layer_10/attention/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_10/attention/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_10/attention/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_10/attention/output/add" + input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_10/attention/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/read" + input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_10/intermediate/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_10/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_10/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_10/intermediate/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_10/intermediate/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_10/intermediate/dense/kernel" + input: "bert/encoder/layer_10/intermediate/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_10/intermediate/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/bias/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/bias/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_10/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_10/intermediate/dense/bias/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_10/intermediate/dense/bias" + input: "bert/encoder/layer_10/intermediate/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_10/intermediate/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_10/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_10/intermediate/dense/MatMul" + input: "bert/encoder/layer_10/intermediate/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/Pow/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 3.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/Pow" + op: "Pow" + input: "bert/encoder/layer_10/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_10/intermediate/dense/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/mul/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.044714998453855515 + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/mul" + op: "Mul" + input: "bert/encoder/layer_10/intermediate/dense/mul/x" + input: "bert/encoder/layer_10/intermediate/dense/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/add" + op: "Add" + input: "bert/encoder/layer_10/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_10/intermediate/dense/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/mul_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.7978845834732056 + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/mul_1" + op: "Mul" + input: "bert/encoder/layer_10/intermediate/dense/mul_1/x" + input: "bert/encoder/layer_10/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/Tanh" + op: "Tanh" + input: "bert/encoder/layer_10/intermediate/dense/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/add_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/add_1" + op: "Add" + input: "bert/encoder/layer_10/intermediate/dense/add_1/x" + input: "bert/encoder/layer_10/intermediate/dense/Tanh" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/mul_2/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.5 + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/mul_2" + op: "Mul" + input: "bert/encoder/layer_10/intermediate/dense/mul_2/x" + input: "bert/encoder/layer_10/intermediate/dense/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/mul_3" + op: "Mul" + input: "bert/encoder/layer_10/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_10/intermediate/dense/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_10/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_10/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_10/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_10/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_10/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_10/output/dense/kernel" + input: "bert/encoder/layer_10/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_10/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_10/output/dense/bias" + input: "bert/encoder/layer_10/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_10/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_10/intermediate/dense/mul_3" + input: "bert/encoder/layer_10/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_10/output/dense/MatMul" + input: "bert/encoder/layer_10/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_10/output/dropout/rate" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_10/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_10/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_10/output/dropout/random_uniform/max" + input: "bert/encoder/layer_10/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_10/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_10/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_10/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_10/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dropout/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dropout/sub" + op: "Sub" + input: "bert/encoder/layer_10/output/dropout/sub/x" + input: "bert/encoder/layer_10/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dropout/truediv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dropout/truediv" + op: "RealDiv" + input: "bert/encoder/layer_10/output/dropout/truediv/x" + input: "bert/encoder/layer_10/output/dropout/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dropout/GreaterEqual" + op: "GreaterEqual" + input: "bert/encoder/layer_10/output/dropout/random_uniform" + input: "bert/encoder/layer_10/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_10/output/dense/BiasAdd" + input: "bert/encoder/layer_10/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dropout/Cast" + op: "Cast" + input: "bert/encoder/layer_10/output/dropout/GreaterEqual" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_BOOL + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dropout/mul_1" + op: "Mul" + input: "bert/encoder/layer_10/output/dropout/mul" + input: "bert/encoder/layer_10/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/add" + op: "Add" + input: "bert/encoder/layer_10/output/dropout/mul_1" + input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_10/output/LayerNorm/beta" + input: "bert/encoder/layer_10/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_10/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_10/output/LayerNorm/gamma" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_10/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_10/output/add" + input: "bert/encoder/layer_10/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_10/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_10/output/add" + input: "bert/encoder/layer_10/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_10/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_10/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_10/output/add" + input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_10/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_10/output/LayerNorm/beta/read" + input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_11/attention/self/query/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_11/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_11/attention/self/query/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_11/attention/self/query/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_11/attention/self/query/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/query/kernel" + input: "bert/encoder/layer_11/attention/self/query/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/kernel/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/self/query/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/query/bias" + input: "bert/encoder/layer_11/attention/self/query/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/bias/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/self/query/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/MatMul" + op: "MatMul" + input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_11/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_11/attention/self/query/MatMul" + input: "bert/encoder/layer_11/attention/self/query/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_11/attention/self/key/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_11/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_11/attention/self/key/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_11/attention/self/key/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_11/attention/self/key/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/key/kernel" + input: "bert/encoder/layer_11/attention/self/key/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/kernel/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/self/key/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/key/bias" + input: "bert/encoder/layer_11/attention/self/key/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/bias/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/self/key/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/MatMul" + op: "MatMul" + input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_11/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_11/attention/self/key/MatMul" + input: "bert/encoder/layer_11/attention/self/key/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_11/attention/self/value/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_11/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_11/attention/self/value/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_11/attention/self/value/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_11/attention/self/value/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/value/kernel" + input: "bert/encoder/layer_11/attention/self/value/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/kernel/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/self/value/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/value/bias" + input: "bert/encoder/layer_11/attention/self/value/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/bias/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/self/value/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/MatMul" + op: "MatMul" + input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_11/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_11/attention/self/value/MatMul" + input: "bert/encoder/layer_11/attention/self/value/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/Reshape" + op: "Reshape" + input: "bert/encoder/layer_11/attention/self/query/BiasAdd" + input: "bert/encoder/layer_11/attention/self/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/transpose" + op: "Transpose" + input: "bert/encoder/layer_11/attention/self/Reshape" + input: "bert/encoder/layer_11/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/Reshape_1" + op: "Reshape" + input: "bert/encoder/layer_11/attention/self/key/BiasAdd" + input: "bert/encoder/layer_11/attention/self/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/transpose_1" + op: "Transpose" + input: "bert/encoder/layer_11/attention/self/Reshape_1" + input: "bert/encoder/layer_11/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/MatMul" + op: "BatchMatMulV2" + input: "bert/encoder/layer_11/attention/self/transpose" + input: "bert/encoder/layer_11/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/Mul/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.125 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/Mul" + op: "Mul" + input: "bert/encoder/layer_11/attention/self/MatMul" + input: "bert/encoder/layer_11/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/ExpandDims" + op: "ExpandDims" + input: "bert/encoder/mul" + input: "bert/encoder/layer_11/attention/self/ExpandDims/dim" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tdim" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/sub" + op: "Sub" + input: "bert/encoder/layer_11/attention/self/sub/x" + input: "bert/encoder/layer_11/attention/self/ExpandDims" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/mul_1/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: -10000.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/mul_1" + op: "Mul" + input: "bert/encoder/layer_11/attention/self/sub" + input: "bert/encoder/layer_11/attention/self/mul_1/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/add" + op: "Add" + input: "bert/encoder/layer_11/attention/self/Mul" + input: "bert/encoder/layer_11/attention/self/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/Softmax" + op: "Softmax" + input: "bert/encoder/layer_11/attention/self/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/dropout/rate" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_11/attention/self/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_11/attention/self/dropout/random_uniform/max" + input: "bert/encoder/layer_11/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_11/attention/self/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_11/attention/self/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_11/attention/self/dropout/random_uniform/mul" + input: "bert/encoder/layer_11/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/dropout/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/dropout/sub" + op: "Sub" + input: "bert/encoder/layer_11/attention/self/dropout/sub/x" + input: "bert/encoder/layer_11/attention/self/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/dropout/truediv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/dropout/truediv" + op: "RealDiv" + input: "bert/encoder/layer_11/attention/self/dropout/truediv/x" + input: "bert/encoder/layer_11/attention/self/dropout/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/dropout/GreaterEqual" + op: "GreaterEqual" + input: "bert/encoder/layer_11/attention/self/dropout/random_uniform" + input: "bert/encoder/layer_11/attention/self/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_11/attention/self/Softmax" + input: "bert/encoder/layer_11/attention/self/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/dropout/Cast" + op: "Cast" + input: "bert/encoder/layer_11/attention/self/dropout/GreaterEqual" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_BOOL + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/dropout/mul_1" + op: "Mul" + input: "bert/encoder/layer_11/attention/self/dropout/mul" + input: "bert/encoder/layer_11/attention/self/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/Reshape_2" + op: "Reshape" + input: "bert/encoder/layer_11/attention/self/value/BiasAdd" + input: "bert/encoder/layer_11/attention/self/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/transpose_2/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/transpose_2" + op: "Transpose" + input: "bert/encoder/layer_11/attention/self/Reshape_2" + input: "bert/encoder/layer_11/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/layer_11/attention/self/dropout/mul_1" + input: "bert/encoder/layer_11/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/transpose_3/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/transpose_3" + op: "Transpose" + input: "bert/encoder/layer_11/attention/self/MatMul_1" + input: "bert/encoder/layer_11/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/Reshape_3/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/Reshape_3" + op: "Reshape" + input: "bert/encoder/layer_11/attention/self/transpose_3" + input: "bert/encoder/layer_11/attention/self/Reshape_3/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_11/attention/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_11/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_11/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_11/attention/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_11/attention/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/dense/kernel" + input: "bert/encoder/layer_11/attention/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/dense/bias" + input: "bert/encoder/layer_11/attention/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_11/attention/self/Reshape_3" + input: "bert/encoder/layer_11/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_11/attention/output/dense/MatMul" + input: "bert/encoder/layer_11/attention/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dropout/rate" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_11/attention/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_11/attention/output/dropout/random_uniform/max" + input: "bert/encoder/layer_11/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_11/attention/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_11/attention/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_11/attention/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_11/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dropout/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dropout/sub" + op: "Sub" + input: "bert/encoder/layer_11/attention/output/dropout/sub/x" + input: "bert/encoder/layer_11/attention/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dropout/truediv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dropout/truediv" + op: "RealDiv" + input: "bert/encoder/layer_11/attention/output/dropout/truediv/x" + input: "bert/encoder/layer_11/attention/output/dropout/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dropout/GreaterEqual" + op: "GreaterEqual" + input: "bert/encoder/layer_11/attention/output/dropout/random_uniform" + input: "bert/encoder/layer_11/attention/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_11/attention/output/dense/BiasAdd" + input: "bert/encoder/layer_11/attention/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dropout/Cast" + op: "Cast" + input: "bert/encoder/layer_11/attention/output/dropout/GreaterEqual" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_BOOL + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dropout/mul_1" + op: "Mul" + input: "bert/encoder/layer_11/attention/output/dropout/mul" + input: "bert/encoder/layer_11/attention/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/add" + op: "Add" + input: "bert/encoder/layer_11/attention/output/dropout/mul_1" + input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_11/attention/output/add" + input: "bert/encoder/layer_11/attention/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_11/attention/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_11/attention/output/add" + input: "bert/encoder/layer_11/attention/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_11/attention/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_11/attention/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_11/attention/output/add" + input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_11/attention/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/read" + input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_11/intermediate/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_11/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_11/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_11/intermediate/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_11/intermediate/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_11/intermediate/dense/kernel" + input: "bert/encoder/layer_11/intermediate/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_11/intermediate/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/bias/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/bias/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_11/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_11/intermediate/dense/bias/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_11/intermediate/dense/bias" + input: "bert/encoder/layer_11/intermediate/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_11/intermediate/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_11/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_11/intermediate/dense/MatMul" + input: "bert/encoder/layer_11/intermediate/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/Pow/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 3.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/Pow" + op: "Pow" + input: "bert/encoder/layer_11/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_11/intermediate/dense/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/mul/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.044714998453855515 + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/mul" + op: "Mul" + input: "bert/encoder/layer_11/intermediate/dense/mul/x" + input: "bert/encoder/layer_11/intermediate/dense/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/add" + op: "Add" + input: "bert/encoder/layer_11/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_11/intermediate/dense/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/mul_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.7978845834732056 + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/mul_1" + op: "Mul" + input: "bert/encoder/layer_11/intermediate/dense/mul_1/x" + input: "bert/encoder/layer_11/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/Tanh" + op: "Tanh" + input: "bert/encoder/layer_11/intermediate/dense/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/add_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/add_1" + op: "Add" + input: "bert/encoder/layer_11/intermediate/dense/add_1/x" + input: "bert/encoder/layer_11/intermediate/dense/Tanh" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/mul_2/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.5 + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/mul_2" + op: "Mul" + input: "bert/encoder/layer_11/intermediate/dense/mul_2/x" + input: "bert/encoder/layer_11/intermediate/dense/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/mul_3" + op: "Mul" + input: "bert/encoder/layer_11/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_11/intermediate/dense/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_11/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_11/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_11/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_11/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_11/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_11/output/dense/kernel" + input: "bert/encoder/layer_11/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_11/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_11/output/dense/bias" + input: "bert/encoder/layer_11/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_11/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_11/intermediate/dense/mul_3" + input: "bert/encoder/layer_11/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_11/output/dense/MatMul" + input: "bert/encoder/layer_11/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_11/output/dropout/rate" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_11/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_11/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_11/output/dropout/random_uniform/max" + input: "bert/encoder/layer_11/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_11/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_11/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_11/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_11/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dropout/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dropout/sub" + op: "Sub" + input: "bert/encoder/layer_11/output/dropout/sub/x" + input: "bert/encoder/layer_11/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dropout/truediv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dropout/truediv" + op: "RealDiv" + input: "bert/encoder/layer_11/output/dropout/truediv/x" + input: "bert/encoder/layer_11/output/dropout/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dropout/GreaterEqual" + op: "GreaterEqual" + input: "bert/encoder/layer_11/output/dropout/random_uniform" + input: "bert/encoder/layer_11/output/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_11/output/dense/BiasAdd" + input: "bert/encoder/layer_11/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dropout/Cast" + op: "Cast" + input: "bert/encoder/layer_11/output/dropout/GreaterEqual" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_BOOL + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dropout/mul_1" + op: "Mul" + input: "bert/encoder/layer_11/output/dropout/mul" + input: "bert/encoder/layer_11/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/add" + op: "Add" + input: "bert/encoder/layer_11/output/dropout/mul_1" + input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_11/output/LayerNorm/beta" + input: "bert/encoder/layer_11/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_11/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_11/output/LayerNorm/gamma" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_11/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_11/output/add" + input: "bert/encoder/layer_11/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_11/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_11/output/add" + input: "bert/encoder/layer_11/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_11/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_11/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_11/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_11/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_11/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_11/output/add" + input: "bert/encoder/layer_11/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_11/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_11/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_11/output/LayerNorm/beta/read" + input: "bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_11/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/Reshape_2" + op: "Reshape" + input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/Reshape_3/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/Reshape_3" + op: "Reshape" + input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/Reshape_3/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/Reshape_4/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/Reshape_4" + op: "Reshape" + input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/Reshape_4/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/Reshape_5/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/Reshape_5" + op: "Reshape" + input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/Reshape_5/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/Reshape_6/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/Reshape_6" + op: "Reshape" + input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/Reshape_6/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/Reshape_7/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/Reshape_7" + op: "Reshape" + input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/Reshape_7/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/Reshape_8/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/Reshape_8" + op: "Reshape" + input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/Reshape_8/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/Reshape_9/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/Reshape_9" + op: "Reshape" + input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/Reshape_9/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/Reshape_10/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/Reshape_10" + op: "Reshape" + input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/Reshape_10/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/Reshape_11/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/Reshape_11" + op: "Reshape" + input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/Reshape_11/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/Reshape_12/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/Reshape_12" + op: "Reshape" + input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/Reshape_12/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/Reshape_13/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/Reshape_13" + op: "Reshape" + input: "bert/encoder/layer_11/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/Reshape_13/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/pooler/strided_slice/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\000\000\000\000\000\000\000\000" + } + } + } +} +node { + name: "bert/pooler/strided_slice/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\000\000\000\000" + } + } + } +} +node { + name: "bert/pooler/strided_slice/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/pooler/strided_slice" + op: "StridedSlice" + input: "bert/encoder/Reshape_13" + input: "bert/pooler/strided_slice/stack" + input: "bert/pooler/strided_slice/stack_1" + input: "bert/pooler/strided_slice/stack_2" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 5 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 5 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "bert/pooler/Squeeze" + op: "Squeeze" + input: "bert/pooler/strided_slice" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "squeeze_dims" + value { + list { + i: 1 + } + } + } +} +node { + name: "bert/pooler/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/pooler/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/pooler/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/pooler/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/pooler/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/pooler/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/pooler/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/pooler/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/pooler/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/pooler/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/pooler/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/pooler/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/pooler/dense/kernel/Assign" + op: "Assign" + input: "bert/pooler/dense/kernel" + input: "bert/pooler/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/pooler/dense/kernel/read" + op: "Identity" + input: "bert/pooler/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/pooler/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/pooler/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/pooler/dense/bias/Assign" + op: "Assign" + input: "bert/pooler/dense/bias" + input: "bert/pooler/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/pooler/dense/bias/read" + op: "Identity" + input: "bert/pooler/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/pooler/dense/MatMul" + op: "MatMul" + input: "bert/pooler/Squeeze" + input: "bert/pooler/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/pooler/dense/BiasAdd" + op: "BiasAdd" + input: "bert/pooler/dense/MatMul" + input: "bert/pooler/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/pooler/dense/Tanh" + op: "Tanh" + input: "bert/pooler/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "output_weights/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@output_weights" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\003\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "output_weights/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@output_weights" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "output_weights/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@output_weights" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "output_weights/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "output_weights/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@output_weights" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "output_weights/Initializer/truncated_normal/mul" + op: "Mul" + input: "output_weights/Initializer/truncated_normal/TruncatedNormal" + input: "output_weights/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@output_weights" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "output_weights/Initializer/truncated_normal" + op: "Add" + input: "output_weights/Initializer/truncated_normal/mul" + input: "output_weights/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@output_weights" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "output_weights" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@output_weights" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "output_weights/Assign" + op: "Assign" + input: "output_weights" + input: "output_weights/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@output_weights" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "output_weights/read" + op: "Identity" + input: "output_weights" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@output_weights" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "output_bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 3 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "output_bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "output_bias/Assign" + op: "Assign" + input: "output_bias" + input: "output_bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "output_bias/read" + op: "Identity" + input: "output_bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "loss/dropout/rate" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "loss/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: " \000\000\000\000\003\000\000" + } + } + } +} +node { + name: "loss/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "loss/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "loss/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "loss/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "loss/dropout/random_uniform/sub" + op: "Sub" + input: "loss/dropout/random_uniform/max" + input: "loss/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "loss/dropout/random_uniform/mul" + op: "Mul" + input: "loss/dropout/random_uniform/RandomUniform" + input: "loss/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "loss/dropout/random_uniform" + op: "Add" + input: "loss/dropout/random_uniform/mul" + input: "loss/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "loss/dropout/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "loss/dropout/sub" + op: "Sub" + input: "loss/dropout/sub/x" + input: "loss/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "loss/dropout/truediv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "loss/dropout/truediv" + op: "RealDiv" + input: "loss/dropout/truediv/x" + input: "loss/dropout/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "loss/dropout/GreaterEqual" + op: "GreaterEqual" + input: "loss/dropout/random_uniform" + input: "loss/dropout/rate" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "loss/dropout/mul" + op: "Mul" + input: "bert/pooler/dense/Tanh" + input: "loss/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "loss/dropout/Cast" + op: "Cast" + input: "loss/dropout/GreaterEqual" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_BOOL + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "loss/dropout/mul_1" + op: "Mul" + input: "loss/dropout/mul" + input: "loss/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "loss/MatMul" + op: "MatMul" + input: "loss/dropout/mul_1" + input: "output_weights/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 3 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "loss/BiasAdd" + op: "BiasAdd" + input: "loss/MatMul" + input: "output_bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 3 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "loss/Softmax" + op: "Softmax" + input: "loss/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 3 + } + } + } + } + } +} +node { + name: "loss/LogSoftmax" + op: "LogSoftmax" + input: "loss/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 3 + } + } + } + } + } +} +node { + name: "loss/one_hot/on_value" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "loss/one_hot/off_value" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "loss/one_hot/depth" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 3 + } + } + } +} +node { + name: "loss/one_hot" + op: "OneHot" + input: "IteratorGetNext:3" + input: "loss/one_hot/depth" + input: "loss/one_hot/on_value" + input: "loss/one_hot/off_value" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "TI" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 3 + } + } + } + } + } + attr { + key: "axis" + value { + i: -1 + } + } +} +node { + name: "loss/mul" + op: "Mul" + input: "loss/one_hot" + input: "loss/LogSoftmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 3 + } + } + } + } + } +} +node { + name: "loss/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "loss/Sum" + op: "Sum" + input: "loss/mul" + input: "loss/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "loss/Neg" + op: "Neg" + input: "loss/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + } + } + } + } +} +node { + name: "loss/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "loss/Mean" + op: "Mean" + input: "loss/Neg" + input: "loss/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "checkpoint_initializer/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/embeddings/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer" + op: "RestoreV2" + input: "checkpoint_initializer/prefix" + input: "checkpoint_initializer/tensor_names" + input: "checkpoint_initializer/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign" + op: "Assign" + input: "bert/embeddings/LayerNorm/beta" + input: "checkpoint_initializer" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_1/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_1/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/embeddings/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_1/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_1" + op: "RestoreV2" + input: "checkpoint_initializer_1/prefix" + input: "checkpoint_initializer_1/tensor_names" + input: "checkpoint_initializer_1/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_1" + op: "Assign" + input: "bert/embeddings/LayerNorm/gamma" + input: "checkpoint_initializer_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_2/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_2/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/embeddings/position_embeddings" + } + } + } +} +node { + name: "checkpoint_initializer_2/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_2" + op: "RestoreV2" + input: "checkpoint_initializer_2/prefix" + input: "checkpoint_initializer_2/tensor_names" + input: "checkpoint_initializer_2/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_2" + op: "Assign" + input: "bert/embeddings/position_embeddings" + input: "checkpoint_initializer_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_3/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_3/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/embeddings/token_type_embeddings" + } + } + } +} +node { + name: "checkpoint_initializer_3/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_3" + op: "RestoreV2" + input: "checkpoint_initializer_3/prefix" + input: "checkpoint_initializer_3/tensor_names" + input: "checkpoint_initializer_3/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_3" + op: "Assign" + input: "bert/embeddings/token_type_embeddings" + input: "checkpoint_initializer_3" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_4/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_4/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/embeddings/word_embeddings" + } + } + } +} +node { + name: "checkpoint_initializer_4/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_4" + op: "RestoreV2" + input: "checkpoint_initializer_4/prefix" + input: "checkpoint_initializer_4/tensor_names" + input: "checkpoint_initializer_4/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 21128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_4" + op: "Assign" + input: "bert/embeddings/word_embeddings" + input: "checkpoint_initializer_4" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 21128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_5/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_5/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_5/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_5" + op: "RestoreV2" + input: "checkpoint_initializer_5/prefix" + input: "checkpoint_initializer_5/tensor_names" + input: "checkpoint_initializer_5/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_5" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta" + input: "checkpoint_initializer_5" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_6/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_6/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_6/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_6" + op: "RestoreV2" + input: "checkpoint_initializer_6/prefix" + input: "checkpoint_initializer_6/tensor_names" + input: "checkpoint_initializer_6/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_6" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma" + input: "checkpoint_initializer_6" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_7/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_7/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_0/attention/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_7/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_7" + op: "RestoreV2" + input: "checkpoint_initializer_7/prefix" + input: "checkpoint_initializer_7/tensor_names" + input: "checkpoint_initializer_7/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_7" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/dense/bias" + input: "checkpoint_initializer_7" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_8/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_8/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_0/attention/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_8/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_8" + op: "RestoreV2" + input: "checkpoint_initializer_8/prefix" + input: "checkpoint_initializer_8/tensor_names" + input: "checkpoint_initializer_8/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_8" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/dense/kernel" + input: "checkpoint_initializer_8" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_9/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_9/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_0/attention/self/key/bias" + } + } + } +} +node { + name: "checkpoint_initializer_9/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_9" + op: "RestoreV2" + input: "checkpoint_initializer_9/prefix" + input: "checkpoint_initializer_9/tensor_names" + input: "checkpoint_initializer_9/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_9" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/key/bias" + input: "checkpoint_initializer_9" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_10/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_10/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_0/attention/self/key/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_10/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_10" + op: "RestoreV2" + input: "checkpoint_initializer_10/prefix" + input: "checkpoint_initializer_10/tensor_names" + input: "checkpoint_initializer_10/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_10" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/key/kernel" + input: "checkpoint_initializer_10" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_11/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_11/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_0/attention/self/query/bias" + } + } + } +} +node { + name: "checkpoint_initializer_11/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_11" + op: "RestoreV2" + input: "checkpoint_initializer_11/prefix" + input: "checkpoint_initializer_11/tensor_names" + input: "checkpoint_initializer_11/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_11" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/query/bias" + input: "checkpoint_initializer_11" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_12/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_12/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_0/attention/self/query/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_12/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_12" + op: "RestoreV2" + input: "checkpoint_initializer_12/prefix" + input: "checkpoint_initializer_12/tensor_names" + input: "checkpoint_initializer_12/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_12" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/query/kernel" + input: "checkpoint_initializer_12" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_13/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_13/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_0/attention/self/value/bias" + } + } + } +} +node { + name: "checkpoint_initializer_13/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_13" + op: "RestoreV2" + input: "checkpoint_initializer_13/prefix" + input: "checkpoint_initializer_13/tensor_names" + input: "checkpoint_initializer_13/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_13" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/value/bias" + input: "checkpoint_initializer_13" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_14/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_14/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_0/attention/self/value/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_14/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_14" + op: "RestoreV2" + input: "checkpoint_initializer_14/prefix" + input: "checkpoint_initializer_14/tensor_names" + input: "checkpoint_initializer_14/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_14" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/value/kernel" + input: "checkpoint_initializer_14" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_15/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_15/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_0/intermediate/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_15/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_15" + op: "RestoreV2" + input: "checkpoint_initializer_15/prefix" + input: "checkpoint_initializer_15/tensor_names" + input: "checkpoint_initializer_15/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_15" + op: "Assign" + input: "bert/encoder/layer_0/intermediate/dense/bias" + input: "checkpoint_initializer_15" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_16/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_16/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_0/intermediate/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_16/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_16" + op: "RestoreV2" + input: "checkpoint_initializer_16/prefix" + input: "checkpoint_initializer_16/tensor_names" + input: "checkpoint_initializer_16/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_16" + op: "Assign" + input: "bert/encoder/layer_0/intermediate/dense/kernel" + input: "checkpoint_initializer_16" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_17/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_17/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_0/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_17/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_17" + op: "RestoreV2" + input: "checkpoint_initializer_17/prefix" + input: "checkpoint_initializer_17/tensor_names" + input: "checkpoint_initializer_17/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_17" + op: "Assign" + input: "bert/encoder/layer_0/output/LayerNorm/beta" + input: "checkpoint_initializer_17" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_18/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_18/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_0/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_18/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_18" + op: "RestoreV2" + input: "checkpoint_initializer_18/prefix" + input: "checkpoint_initializer_18/tensor_names" + input: "checkpoint_initializer_18/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_18" + op: "Assign" + input: "bert/encoder/layer_0/output/LayerNorm/gamma" + input: "checkpoint_initializer_18" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_19/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_19/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_0/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_19/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_19" + op: "RestoreV2" + input: "checkpoint_initializer_19/prefix" + input: "checkpoint_initializer_19/tensor_names" + input: "checkpoint_initializer_19/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_19" + op: "Assign" + input: "bert/encoder/layer_0/output/dense/bias" + input: "checkpoint_initializer_19" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_20/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_20/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_0/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_20/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_20" + op: "RestoreV2" + input: "checkpoint_initializer_20/prefix" + input: "checkpoint_initializer_20/tensor_names" + input: "checkpoint_initializer_20/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_20" + op: "Assign" + input: "bert/encoder/layer_0/output/dense/kernel" + input: "checkpoint_initializer_20" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_21/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_21/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_21/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_21" + op: "RestoreV2" + input: "checkpoint_initializer_21/prefix" + input: "checkpoint_initializer_21/tensor_names" + input: "checkpoint_initializer_21/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_21" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta" + input: "checkpoint_initializer_21" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_22/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_22/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_22/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_22" + op: "RestoreV2" + input: "checkpoint_initializer_22/prefix" + input: "checkpoint_initializer_22/tensor_names" + input: "checkpoint_initializer_22/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_22" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma" + input: "checkpoint_initializer_22" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_23/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_23/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_1/attention/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_23/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_23" + op: "RestoreV2" + input: "checkpoint_initializer_23/prefix" + input: "checkpoint_initializer_23/tensor_names" + input: "checkpoint_initializer_23/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_23" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/dense/bias" + input: "checkpoint_initializer_23" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_24/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_24/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_1/attention/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_24/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_24" + op: "RestoreV2" + input: "checkpoint_initializer_24/prefix" + input: "checkpoint_initializer_24/tensor_names" + input: "checkpoint_initializer_24/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_24" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/dense/kernel" + input: "checkpoint_initializer_24" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_25/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_25/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_1/attention/self/key/bias" + } + } + } +} +node { + name: "checkpoint_initializer_25/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_25" + op: "RestoreV2" + input: "checkpoint_initializer_25/prefix" + input: "checkpoint_initializer_25/tensor_names" + input: "checkpoint_initializer_25/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_25" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/key/bias" + input: "checkpoint_initializer_25" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_26/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_26/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_1/attention/self/key/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_26/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_26" + op: "RestoreV2" + input: "checkpoint_initializer_26/prefix" + input: "checkpoint_initializer_26/tensor_names" + input: "checkpoint_initializer_26/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_26" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/key/kernel" + input: "checkpoint_initializer_26" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_27/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_27/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_1/attention/self/query/bias" + } + } + } +} +node { + name: "checkpoint_initializer_27/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_27" + op: "RestoreV2" + input: "checkpoint_initializer_27/prefix" + input: "checkpoint_initializer_27/tensor_names" + input: "checkpoint_initializer_27/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_27" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/query/bias" + input: "checkpoint_initializer_27" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_28/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_28/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_1/attention/self/query/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_28/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_28" + op: "RestoreV2" + input: "checkpoint_initializer_28/prefix" + input: "checkpoint_initializer_28/tensor_names" + input: "checkpoint_initializer_28/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_28" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/query/kernel" + input: "checkpoint_initializer_28" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_29/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_29/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_1/attention/self/value/bias" + } + } + } +} +node { + name: "checkpoint_initializer_29/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_29" + op: "RestoreV2" + input: "checkpoint_initializer_29/prefix" + input: "checkpoint_initializer_29/tensor_names" + input: "checkpoint_initializer_29/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_29" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/value/bias" + input: "checkpoint_initializer_29" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_30/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_30/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_1/attention/self/value/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_30/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_30" + op: "RestoreV2" + input: "checkpoint_initializer_30/prefix" + input: "checkpoint_initializer_30/tensor_names" + input: "checkpoint_initializer_30/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_30" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/value/kernel" + input: "checkpoint_initializer_30" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_31/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_31/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_1/intermediate/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_31/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_31" + op: "RestoreV2" + input: "checkpoint_initializer_31/prefix" + input: "checkpoint_initializer_31/tensor_names" + input: "checkpoint_initializer_31/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_31" + op: "Assign" + input: "bert/encoder/layer_1/intermediate/dense/bias" + input: "checkpoint_initializer_31" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_32/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_32/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_1/intermediate/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_32/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_32" + op: "RestoreV2" + input: "checkpoint_initializer_32/prefix" + input: "checkpoint_initializer_32/tensor_names" + input: "checkpoint_initializer_32/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_32" + op: "Assign" + input: "bert/encoder/layer_1/intermediate/dense/kernel" + input: "checkpoint_initializer_32" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_33/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_33/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_1/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_33/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_33" + op: "RestoreV2" + input: "checkpoint_initializer_33/prefix" + input: "checkpoint_initializer_33/tensor_names" + input: "checkpoint_initializer_33/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_33" + op: "Assign" + input: "bert/encoder/layer_1/output/LayerNorm/beta" + input: "checkpoint_initializer_33" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_34/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_34/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_1/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_34/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_34" + op: "RestoreV2" + input: "checkpoint_initializer_34/prefix" + input: "checkpoint_initializer_34/tensor_names" + input: "checkpoint_initializer_34/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_34" + op: "Assign" + input: "bert/encoder/layer_1/output/LayerNorm/gamma" + input: "checkpoint_initializer_34" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_35/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_35/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_1/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_35/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_35" + op: "RestoreV2" + input: "checkpoint_initializer_35/prefix" + input: "checkpoint_initializer_35/tensor_names" + input: "checkpoint_initializer_35/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_35" + op: "Assign" + input: "bert/encoder/layer_1/output/dense/bias" + input: "checkpoint_initializer_35" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_36/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_36/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_1/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_36/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_36" + op: "RestoreV2" + input: "checkpoint_initializer_36/prefix" + input: "checkpoint_initializer_36/tensor_names" + input: "checkpoint_initializer_36/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_36" + op: "Assign" + input: "bert/encoder/layer_1/output/dense/kernel" + input: "checkpoint_initializer_36" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_37/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_37/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_37/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_37" + op: "RestoreV2" + input: "checkpoint_initializer_37/prefix" + input: "checkpoint_initializer_37/tensor_names" + input: "checkpoint_initializer_37/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_37" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta" + input: "checkpoint_initializer_37" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_38/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_38/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_38/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_38" + op: "RestoreV2" + input: "checkpoint_initializer_38/prefix" + input: "checkpoint_initializer_38/tensor_names" + input: "checkpoint_initializer_38/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_38" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma" + input: "checkpoint_initializer_38" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_39/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_39/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_10/attention/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_39/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_39" + op: "RestoreV2" + input: "checkpoint_initializer_39/prefix" + input: "checkpoint_initializer_39/tensor_names" + input: "checkpoint_initializer_39/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_39" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/dense/bias" + input: "checkpoint_initializer_39" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_40/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_40/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_10/attention/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_40/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_40" + op: "RestoreV2" + input: "checkpoint_initializer_40/prefix" + input: "checkpoint_initializer_40/tensor_names" + input: "checkpoint_initializer_40/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_40" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/dense/kernel" + input: "checkpoint_initializer_40" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_41/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_41/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_10/attention/self/key/bias" + } + } + } +} +node { + name: "checkpoint_initializer_41/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_41" + op: "RestoreV2" + input: "checkpoint_initializer_41/prefix" + input: "checkpoint_initializer_41/tensor_names" + input: "checkpoint_initializer_41/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_41" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/key/bias" + input: "checkpoint_initializer_41" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_42/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_42/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_10/attention/self/key/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_42/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_42" + op: "RestoreV2" + input: "checkpoint_initializer_42/prefix" + input: "checkpoint_initializer_42/tensor_names" + input: "checkpoint_initializer_42/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_42" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/key/kernel" + input: "checkpoint_initializer_42" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_43/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_43/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_10/attention/self/query/bias" + } + } + } +} +node { + name: "checkpoint_initializer_43/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_43" + op: "RestoreV2" + input: "checkpoint_initializer_43/prefix" + input: "checkpoint_initializer_43/tensor_names" + input: "checkpoint_initializer_43/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_43" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/query/bias" + input: "checkpoint_initializer_43" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_44/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_44/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_10/attention/self/query/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_44/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_44" + op: "RestoreV2" + input: "checkpoint_initializer_44/prefix" + input: "checkpoint_initializer_44/tensor_names" + input: "checkpoint_initializer_44/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_44" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/query/kernel" + input: "checkpoint_initializer_44" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_45/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_45/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_10/attention/self/value/bias" + } + } + } +} +node { + name: "checkpoint_initializer_45/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_45" + op: "RestoreV2" + input: "checkpoint_initializer_45/prefix" + input: "checkpoint_initializer_45/tensor_names" + input: "checkpoint_initializer_45/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_45" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/value/bias" + input: "checkpoint_initializer_45" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_46/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_46/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_10/attention/self/value/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_46/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_46" + op: "RestoreV2" + input: "checkpoint_initializer_46/prefix" + input: "checkpoint_initializer_46/tensor_names" + input: "checkpoint_initializer_46/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_46" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/value/kernel" + input: "checkpoint_initializer_46" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_47/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_47/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_10/intermediate/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_47/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_47" + op: "RestoreV2" + input: "checkpoint_initializer_47/prefix" + input: "checkpoint_initializer_47/tensor_names" + input: "checkpoint_initializer_47/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_47" + op: "Assign" + input: "bert/encoder/layer_10/intermediate/dense/bias" + input: "checkpoint_initializer_47" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_48/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_48/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_10/intermediate/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_48/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_48" + op: "RestoreV2" + input: "checkpoint_initializer_48/prefix" + input: "checkpoint_initializer_48/tensor_names" + input: "checkpoint_initializer_48/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_48" + op: "Assign" + input: "bert/encoder/layer_10/intermediate/dense/kernel" + input: "checkpoint_initializer_48" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_49/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_49/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_10/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_49/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_49" + op: "RestoreV2" + input: "checkpoint_initializer_49/prefix" + input: "checkpoint_initializer_49/tensor_names" + input: "checkpoint_initializer_49/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_49" + op: "Assign" + input: "bert/encoder/layer_10/output/LayerNorm/beta" + input: "checkpoint_initializer_49" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_50/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_50/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_10/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_50/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_50" + op: "RestoreV2" + input: "checkpoint_initializer_50/prefix" + input: "checkpoint_initializer_50/tensor_names" + input: "checkpoint_initializer_50/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_50" + op: "Assign" + input: "bert/encoder/layer_10/output/LayerNorm/gamma" + input: "checkpoint_initializer_50" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_51/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_51/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_10/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_51/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_51" + op: "RestoreV2" + input: "checkpoint_initializer_51/prefix" + input: "checkpoint_initializer_51/tensor_names" + input: "checkpoint_initializer_51/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_51" + op: "Assign" + input: "bert/encoder/layer_10/output/dense/bias" + input: "checkpoint_initializer_51" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_52/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_52/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_10/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_52/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_52" + op: "RestoreV2" + input: "checkpoint_initializer_52/prefix" + input: "checkpoint_initializer_52/tensor_names" + input: "checkpoint_initializer_52/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_52" + op: "Assign" + input: "bert/encoder/layer_10/output/dense/kernel" + input: "checkpoint_initializer_52" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_53/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_53/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_53/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_53" + op: "RestoreV2" + input: "checkpoint_initializer_53/prefix" + input: "checkpoint_initializer_53/tensor_names" + input: "checkpoint_initializer_53/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_53" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta" + input: "checkpoint_initializer_53" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_54/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_54/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_54/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_54" + op: "RestoreV2" + input: "checkpoint_initializer_54/prefix" + input: "checkpoint_initializer_54/tensor_names" + input: "checkpoint_initializer_54/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_54" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma" + input: "checkpoint_initializer_54" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_55/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_55/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_11/attention/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_55/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_55" + op: "RestoreV2" + input: "checkpoint_initializer_55/prefix" + input: "checkpoint_initializer_55/tensor_names" + input: "checkpoint_initializer_55/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_55" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/dense/bias" + input: "checkpoint_initializer_55" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_56/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_56/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_11/attention/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_56/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_56" + op: "RestoreV2" + input: "checkpoint_initializer_56/prefix" + input: "checkpoint_initializer_56/tensor_names" + input: "checkpoint_initializer_56/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_56" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/dense/kernel" + input: "checkpoint_initializer_56" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_57/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_57/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_11/attention/self/key/bias" + } + } + } +} +node { + name: "checkpoint_initializer_57/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_57" + op: "RestoreV2" + input: "checkpoint_initializer_57/prefix" + input: "checkpoint_initializer_57/tensor_names" + input: "checkpoint_initializer_57/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_57" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/key/bias" + input: "checkpoint_initializer_57" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_58/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_58/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_11/attention/self/key/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_58/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_58" + op: "RestoreV2" + input: "checkpoint_initializer_58/prefix" + input: "checkpoint_initializer_58/tensor_names" + input: "checkpoint_initializer_58/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_58" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/key/kernel" + input: "checkpoint_initializer_58" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_59/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_59/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_11/attention/self/query/bias" + } + } + } +} +node { + name: "checkpoint_initializer_59/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_59" + op: "RestoreV2" + input: "checkpoint_initializer_59/prefix" + input: "checkpoint_initializer_59/tensor_names" + input: "checkpoint_initializer_59/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_59" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/query/bias" + input: "checkpoint_initializer_59" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_60/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_60/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_11/attention/self/query/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_60/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_60" + op: "RestoreV2" + input: "checkpoint_initializer_60/prefix" + input: "checkpoint_initializer_60/tensor_names" + input: "checkpoint_initializer_60/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_60" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/query/kernel" + input: "checkpoint_initializer_60" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_61/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_61/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_11/attention/self/value/bias" + } + } + } +} +node { + name: "checkpoint_initializer_61/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_61" + op: "RestoreV2" + input: "checkpoint_initializer_61/prefix" + input: "checkpoint_initializer_61/tensor_names" + input: "checkpoint_initializer_61/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_61" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/value/bias" + input: "checkpoint_initializer_61" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_62/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_62/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_11/attention/self/value/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_62/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_62" + op: "RestoreV2" + input: "checkpoint_initializer_62/prefix" + input: "checkpoint_initializer_62/tensor_names" + input: "checkpoint_initializer_62/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_62" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/value/kernel" + input: "checkpoint_initializer_62" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_63/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_63/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_11/intermediate/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_63/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_63" + op: "RestoreV2" + input: "checkpoint_initializer_63/prefix" + input: "checkpoint_initializer_63/tensor_names" + input: "checkpoint_initializer_63/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_63" + op: "Assign" + input: "bert/encoder/layer_11/intermediate/dense/bias" + input: "checkpoint_initializer_63" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_64/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_64/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_11/intermediate/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_64/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_64" + op: "RestoreV2" + input: "checkpoint_initializer_64/prefix" + input: "checkpoint_initializer_64/tensor_names" + input: "checkpoint_initializer_64/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_64" + op: "Assign" + input: "bert/encoder/layer_11/intermediate/dense/kernel" + input: "checkpoint_initializer_64" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_65/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_65/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_11/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_65/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_65" + op: "RestoreV2" + input: "checkpoint_initializer_65/prefix" + input: "checkpoint_initializer_65/tensor_names" + input: "checkpoint_initializer_65/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_65" + op: "Assign" + input: "bert/encoder/layer_11/output/LayerNorm/beta" + input: "checkpoint_initializer_65" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_66/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_66/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_11/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_66/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_66" + op: "RestoreV2" + input: "checkpoint_initializer_66/prefix" + input: "checkpoint_initializer_66/tensor_names" + input: "checkpoint_initializer_66/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_66" + op: "Assign" + input: "bert/encoder/layer_11/output/LayerNorm/gamma" + input: "checkpoint_initializer_66" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_67/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_67/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_11/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_67/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_67" + op: "RestoreV2" + input: "checkpoint_initializer_67/prefix" + input: "checkpoint_initializer_67/tensor_names" + input: "checkpoint_initializer_67/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_67" + op: "Assign" + input: "bert/encoder/layer_11/output/dense/bias" + input: "checkpoint_initializer_67" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_68/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_68/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_11/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_68/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_68" + op: "RestoreV2" + input: "checkpoint_initializer_68/prefix" + input: "checkpoint_initializer_68/tensor_names" + input: "checkpoint_initializer_68/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_68" + op: "Assign" + input: "bert/encoder/layer_11/output/dense/kernel" + input: "checkpoint_initializer_68" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_69/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_69/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_69/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_69" + op: "RestoreV2" + input: "checkpoint_initializer_69/prefix" + input: "checkpoint_initializer_69/tensor_names" + input: "checkpoint_initializer_69/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_69" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta" + input: "checkpoint_initializer_69" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_70/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_70/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_70/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_70" + op: "RestoreV2" + input: "checkpoint_initializer_70/prefix" + input: "checkpoint_initializer_70/tensor_names" + input: "checkpoint_initializer_70/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_70" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma" + input: "checkpoint_initializer_70" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_71/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_71/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_2/attention/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_71/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_71" + op: "RestoreV2" + input: "checkpoint_initializer_71/prefix" + input: "checkpoint_initializer_71/tensor_names" + input: "checkpoint_initializer_71/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_71" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/dense/bias" + input: "checkpoint_initializer_71" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_72/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_72/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_2/attention/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_72/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_72" + op: "RestoreV2" + input: "checkpoint_initializer_72/prefix" + input: "checkpoint_initializer_72/tensor_names" + input: "checkpoint_initializer_72/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_72" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/dense/kernel" + input: "checkpoint_initializer_72" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_73/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_73/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_2/attention/self/key/bias" + } + } + } +} +node { + name: "checkpoint_initializer_73/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_73" + op: "RestoreV2" + input: "checkpoint_initializer_73/prefix" + input: "checkpoint_initializer_73/tensor_names" + input: "checkpoint_initializer_73/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_73" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/key/bias" + input: "checkpoint_initializer_73" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_74/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_74/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_2/attention/self/key/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_74/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_74" + op: "RestoreV2" + input: "checkpoint_initializer_74/prefix" + input: "checkpoint_initializer_74/tensor_names" + input: "checkpoint_initializer_74/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_74" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/key/kernel" + input: "checkpoint_initializer_74" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_75/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_75/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_2/attention/self/query/bias" + } + } + } +} +node { + name: "checkpoint_initializer_75/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_75" + op: "RestoreV2" + input: "checkpoint_initializer_75/prefix" + input: "checkpoint_initializer_75/tensor_names" + input: "checkpoint_initializer_75/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_75" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/query/bias" + input: "checkpoint_initializer_75" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_76/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_76/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_2/attention/self/query/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_76/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_76" + op: "RestoreV2" + input: "checkpoint_initializer_76/prefix" + input: "checkpoint_initializer_76/tensor_names" + input: "checkpoint_initializer_76/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_76" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/query/kernel" + input: "checkpoint_initializer_76" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_77/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_77/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_2/attention/self/value/bias" + } + } + } +} +node { + name: "checkpoint_initializer_77/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_77" + op: "RestoreV2" + input: "checkpoint_initializer_77/prefix" + input: "checkpoint_initializer_77/tensor_names" + input: "checkpoint_initializer_77/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_77" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/value/bias" + input: "checkpoint_initializer_77" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_78/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_78/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_2/attention/self/value/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_78/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_78" + op: "RestoreV2" + input: "checkpoint_initializer_78/prefix" + input: "checkpoint_initializer_78/tensor_names" + input: "checkpoint_initializer_78/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_78" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/value/kernel" + input: "checkpoint_initializer_78" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_79/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_79/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_2/intermediate/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_79/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_79" + op: "RestoreV2" + input: "checkpoint_initializer_79/prefix" + input: "checkpoint_initializer_79/tensor_names" + input: "checkpoint_initializer_79/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_79" + op: "Assign" + input: "bert/encoder/layer_2/intermediate/dense/bias" + input: "checkpoint_initializer_79" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_80/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_80/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_2/intermediate/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_80/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_80" + op: "RestoreV2" + input: "checkpoint_initializer_80/prefix" + input: "checkpoint_initializer_80/tensor_names" + input: "checkpoint_initializer_80/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_80" + op: "Assign" + input: "bert/encoder/layer_2/intermediate/dense/kernel" + input: "checkpoint_initializer_80" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_81/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_81/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_2/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_81/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_81" + op: "RestoreV2" + input: "checkpoint_initializer_81/prefix" + input: "checkpoint_initializer_81/tensor_names" + input: "checkpoint_initializer_81/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_81" + op: "Assign" + input: "bert/encoder/layer_2/output/LayerNorm/beta" + input: "checkpoint_initializer_81" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_82/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_82/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_2/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_82/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_82" + op: "RestoreV2" + input: "checkpoint_initializer_82/prefix" + input: "checkpoint_initializer_82/tensor_names" + input: "checkpoint_initializer_82/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_82" + op: "Assign" + input: "bert/encoder/layer_2/output/LayerNorm/gamma" + input: "checkpoint_initializer_82" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_83/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_83/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_2/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_83/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_83" + op: "RestoreV2" + input: "checkpoint_initializer_83/prefix" + input: "checkpoint_initializer_83/tensor_names" + input: "checkpoint_initializer_83/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_83" + op: "Assign" + input: "bert/encoder/layer_2/output/dense/bias" + input: "checkpoint_initializer_83" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_84/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_84/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_2/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_84/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_84" + op: "RestoreV2" + input: "checkpoint_initializer_84/prefix" + input: "checkpoint_initializer_84/tensor_names" + input: "checkpoint_initializer_84/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_84" + op: "Assign" + input: "bert/encoder/layer_2/output/dense/kernel" + input: "checkpoint_initializer_84" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_85/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_85/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_85/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_85" + op: "RestoreV2" + input: "checkpoint_initializer_85/prefix" + input: "checkpoint_initializer_85/tensor_names" + input: "checkpoint_initializer_85/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_85" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta" + input: "checkpoint_initializer_85" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_86/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_86/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_86/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_86" + op: "RestoreV2" + input: "checkpoint_initializer_86/prefix" + input: "checkpoint_initializer_86/tensor_names" + input: "checkpoint_initializer_86/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_86" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma" + input: "checkpoint_initializer_86" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_87/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_87/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_3/attention/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_87/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_87" + op: "RestoreV2" + input: "checkpoint_initializer_87/prefix" + input: "checkpoint_initializer_87/tensor_names" + input: "checkpoint_initializer_87/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_87" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/dense/bias" + input: "checkpoint_initializer_87" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_88/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_88/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_3/attention/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_88/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_88" + op: "RestoreV2" + input: "checkpoint_initializer_88/prefix" + input: "checkpoint_initializer_88/tensor_names" + input: "checkpoint_initializer_88/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_88" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/dense/kernel" + input: "checkpoint_initializer_88" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_89/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_89/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_3/attention/self/key/bias" + } + } + } +} +node { + name: "checkpoint_initializer_89/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_89" + op: "RestoreV2" + input: "checkpoint_initializer_89/prefix" + input: "checkpoint_initializer_89/tensor_names" + input: "checkpoint_initializer_89/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_89" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/key/bias" + input: "checkpoint_initializer_89" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_90/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_90/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_3/attention/self/key/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_90/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_90" + op: "RestoreV2" + input: "checkpoint_initializer_90/prefix" + input: "checkpoint_initializer_90/tensor_names" + input: "checkpoint_initializer_90/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_90" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/key/kernel" + input: "checkpoint_initializer_90" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_91/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_91/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_3/attention/self/query/bias" + } + } + } +} +node { + name: "checkpoint_initializer_91/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_91" + op: "RestoreV2" + input: "checkpoint_initializer_91/prefix" + input: "checkpoint_initializer_91/tensor_names" + input: "checkpoint_initializer_91/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_91" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/query/bias" + input: "checkpoint_initializer_91" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_92/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_92/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_3/attention/self/query/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_92/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_92" + op: "RestoreV2" + input: "checkpoint_initializer_92/prefix" + input: "checkpoint_initializer_92/tensor_names" + input: "checkpoint_initializer_92/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_92" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/query/kernel" + input: "checkpoint_initializer_92" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_93/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_93/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_3/attention/self/value/bias" + } + } + } +} +node { + name: "checkpoint_initializer_93/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_93" + op: "RestoreV2" + input: "checkpoint_initializer_93/prefix" + input: "checkpoint_initializer_93/tensor_names" + input: "checkpoint_initializer_93/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_93" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/value/bias" + input: "checkpoint_initializer_93" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_94/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_94/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_3/attention/self/value/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_94/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_94" + op: "RestoreV2" + input: "checkpoint_initializer_94/prefix" + input: "checkpoint_initializer_94/tensor_names" + input: "checkpoint_initializer_94/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_94" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/value/kernel" + input: "checkpoint_initializer_94" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_95/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_95/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_3/intermediate/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_95/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_95" + op: "RestoreV2" + input: "checkpoint_initializer_95/prefix" + input: "checkpoint_initializer_95/tensor_names" + input: "checkpoint_initializer_95/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_95" + op: "Assign" + input: "bert/encoder/layer_3/intermediate/dense/bias" + input: "checkpoint_initializer_95" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_96/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_96/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_3/intermediate/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_96/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_96" + op: "RestoreV2" + input: "checkpoint_initializer_96/prefix" + input: "checkpoint_initializer_96/tensor_names" + input: "checkpoint_initializer_96/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_96" + op: "Assign" + input: "bert/encoder/layer_3/intermediate/dense/kernel" + input: "checkpoint_initializer_96" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_97/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_97/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_3/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_97/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_97" + op: "RestoreV2" + input: "checkpoint_initializer_97/prefix" + input: "checkpoint_initializer_97/tensor_names" + input: "checkpoint_initializer_97/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_97" + op: "Assign" + input: "bert/encoder/layer_3/output/LayerNorm/beta" + input: "checkpoint_initializer_97" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_98/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_98/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_3/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_98/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_98" + op: "RestoreV2" + input: "checkpoint_initializer_98/prefix" + input: "checkpoint_initializer_98/tensor_names" + input: "checkpoint_initializer_98/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_98" + op: "Assign" + input: "bert/encoder/layer_3/output/LayerNorm/gamma" + input: "checkpoint_initializer_98" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_99/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_99/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_3/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_99/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_99" + op: "RestoreV2" + input: "checkpoint_initializer_99/prefix" + input: "checkpoint_initializer_99/tensor_names" + input: "checkpoint_initializer_99/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_99" + op: "Assign" + input: "bert/encoder/layer_3/output/dense/bias" + input: "checkpoint_initializer_99" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_100/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_100/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_3/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_100/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_100" + op: "RestoreV2" + input: "checkpoint_initializer_100/prefix" + input: "checkpoint_initializer_100/tensor_names" + input: "checkpoint_initializer_100/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_100" + op: "Assign" + input: "bert/encoder/layer_3/output/dense/kernel" + input: "checkpoint_initializer_100" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_101/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_101/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_101/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_101" + op: "RestoreV2" + input: "checkpoint_initializer_101/prefix" + input: "checkpoint_initializer_101/tensor_names" + input: "checkpoint_initializer_101/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_101" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta" + input: "checkpoint_initializer_101" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_102/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_102/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_102/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_102" + op: "RestoreV2" + input: "checkpoint_initializer_102/prefix" + input: "checkpoint_initializer_102/tensor_names" + input: "checkpoint_initializer_102/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_102" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma" + input: "checkpoint_initializer_102" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_103/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_103/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_4/attention/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_103/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_103" + op: "RestoreV2" + input: "checkpoint_initializer_103/prefix" + input: "checkpoint_initializer_103/tensor_names" + input: "checkpoint_initializer_103/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_103" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/dense/bias" + input: "checkpoint_initializer_103" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_104/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_104/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_4/attention/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_104/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_104" + op: "RestoreV2" + input: "checkpoint_initializer_104/prefix" + input: "checkpoint_initializer_104/tensor_names" + input: "checkpoint_initializer_104/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_104" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/dense/kernel" + input: "checkpoint_initializer_104" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_105/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_105/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_4/attention/self/key/bias" + } + } + } +} +node { + name: "checkpoint_initializer_105/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_105" + op: "RestoreV2" + input: "checkpoint_initializer_105/prefix" + input: "checkpoint_initializer_105/tensor_names" + input: "checkpoint_initializer_105/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_105" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/key/bias" + input: "checkpoint_initializer_105" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_106/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_106/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_4/attention/self/key/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_106/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_106" + op: "RestoreV2" + input: "checkpoint_initializer_106/prefix" + input: "checkpoint_initializer_106/tensor_names" + input: "checkpoint_initializer_106/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_106" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/key/kernel" + input: "checkpoint_initializer_106" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_107/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_107/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_4/attention/self/query/bias" + } + } + } +} +node { + name: "checkpoint_initializer_107/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_107" + op: "RestoreV2" + input: "checkpoint_initializer_107/prefix" + input: "checkpoint_initializer_107/tensor_names" + input: "checkpoint_initializer_107/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_107" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/query/bias" + input: "checkpoint_initializer_107" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_108/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_108/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_4/attention/self/query/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_108/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_108" + op: "RestoreV2" + input: "checkpoint_initializer_108/prefix" + input: "checkpoint_initializer_108/tensor_names" + input: "checkpoint_initializer_108/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_108" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/query/kernel" + input: "checkpoint_initializer_108" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_109/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_109/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_4/attention/self/value/bias" + } + } + } +} +node { + name: "checkpoint_initializer_109/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_109" + op: "RestoreV2" + input: "checkpoint_initializer_109/prefix" + input: "checkpoint_initializer_109/tensor_names" + input: "checkpoint_initializer_109/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_109" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/value/bias" + input: "checkpoint_initializer_109" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_110/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_110/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_4/attention/self/value/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_110/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_110" + op: "RestoreV2" + input: "checkpoint_initializer_110/prefix" + input: "checkpoint_initializer_110/tensor_names" + input: "checkpoint_initializer_110/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_110" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/value/kernel" + input: "checkpoint_initializer_110" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_111/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_111/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_4/intermediate/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_111/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_111" + op: "RestoreV2" + input: "checkpoint_initializer_111/prefix" + input: "checkpoint_initializer_111/tensor_names" + input: "checkpoint_initializer_111/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_111" + op: "Assign" + input: "bert/encoder/layer_4/intermediate/dense/bias" + input: "checkpoint_initializer_111" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_112/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_112/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_4/intermediate/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_112/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_112" + op: "RestoreV2" + input: "checkpoint_initializer_112/prefix" + input: "checkpoint_initializer_112/tensor_names" + input: "checkpoint_initializer_112/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_112" + op: "Assign" + input: "bert/encoder/layer_4/intermediate/dense/kernel" + input: "checkpoint_initializer_112" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_113/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_113/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_4/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_113/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_113" + op: "RestoreV2" + input: "checkpoint_initializer_113/prefix" + input: "checkpoint_initializer_113/tensor_names" + input: "checkpoint_initializer_113/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_113" + op: "Assign" + input: "bert/encoder/layer_4/output/LayerNorm/beta" + input: "checkpoint_initializer_113" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_114/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_114/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_4/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_114/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_114" + op: "RestoreV2" + input: "checkpoint_initializer_114/prefix" + input: "checkpoint_initializer_114/tensor_names" + input: "checkpoint_initializer_114/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_114" + op: "Assign" + input: "bert/encoder/layer_4/output/LayerNorm/gamma" + input: "checkpoint_initializer_114" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_115/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_115/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_4/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_115/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_115" + op: "RestoreV2" + input: "checkpoint_initializer_115/prefix" + input: "checkpoint_initializer_115/tensor_names" + input: "checkpoint_initializer_115/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_115" + op: "Assign" + input: "bert/encoder/layer_4/output/dense/bias" + input: "checkpoint_initializer_115" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_116/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_116/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_4/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_116/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_116" + op: "RestoreV2" + input: "checkpoint_initializer_116/prefix" + input: "checkpoint_initializer_116/tensor_names" + input: "checkpoint_initializer_116/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_116" + op: "Assign" + input: "bert/encoder/layer_4/output/dense/kernel" + input: "checkpoint_initializer_116" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_117/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_117/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_117/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_117" + op: "RestoreV2" + input: "checkpoint_initializer_117/prefix" + input: "checkpoint_initializer_117/tensor_names" + input: "checkpoint_initializer_117/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_117" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta" + input: "checkpoint_initializer_117" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_118/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_118/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_118/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_118" + op: "RestoreV2" + input: "checkpoint_initializer_118/prefix" + input: "checkpoint_initializer_118/tensor_names" + input: "checkpoint_initializer_118/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_118" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma" + input: "checkpoint_initializer_118" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_119/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_119/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_5/attention/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_119/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_119" + op: "RestoreV2" + input: "checkpoint_initializer_119/prefix" + input: "checkpoint_initializer_119/tensor_names" + input: "checkpoint_initializer_119/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_119" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/dense/bias" + input: "checkpoint_initializer_119" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_120/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_120/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_5/attention/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_120/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_120" + op: "RestoreV2" + input: "checkpoint_initializer_120/prefix" + input: "checkpoint_initializer_120/tensor_names" + input: "checkpoint_initializer_120/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_120" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/dense/kernel" + input: "checkpoint_initializer_120" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_121/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_121/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_5/attention/self/key/bias" + } + } + } +} +node { + name: "checkpoint_initializer_121/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_121" + op: "RestoreV2" + input: "checkpoint_initializer_121/prefix" + input: "checkpoint_initializer_121/tensor_names" + input: "checkpoint_initializer_121/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_121" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/key/bias" + input: "checkpoint_initializer_121" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_122/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_122/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_5/attention/self/key/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_122/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_122" + op: "RestoreV2" + input: "checkpoint_initializer_122/prefix" + input: "checkpoint_initializer_122/tensor_names" + input: "checkpoint_initializer_122/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_122" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/key/kernel" + input: "checkpoint_initializer_122" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_123/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_123/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_5/attention/self/query/bias" + } + } + } +} +node { + name: "checkpoint_initializer_123/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_123" + op: "RestoreV2" + input: "checkpoint_initializer_123/prefix" + input: "checkpoint_initializer_123/tensor_names" + input: "checkpoint_initializer_123/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_123" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/query/bias" + input: "checkpoint_initializer_123" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_124/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_124/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_5/attention/self/query/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_124/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_124" + op: "RestoreV2" + input: "checkpoint_initializer_124/prefix" + input: "checkpoint_initializer_124/tensor_names" + input: "checkpoint_initializer_124/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_124" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/query/kernel" + input: "checkpoint_initializer_124" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_125/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_125/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_5/attention/self/value/bias" + } + } + } +} +node { + name: "checkpoint_initializer_125/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_125" + op: "RestoreV2" + input: "checkpoint_initializer_125/prefix" + input: "checkpoint_initializer_125/tensor_names" + input: "checkpoint_initializer_125/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_125" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/value/bias" + input: "checkpoint_initializer_125" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_126/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_126/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_5/attention/self/value/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_126/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_126" + op: "RestoreV2" + input: "checkpoint_initializer_126/prefix" + input: "checkpoint_initializer_126/tensor_names" + input: "checkpoint_initializer_126/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_126" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/value/kernel" + input: "checkpoint_initializer_126" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_127/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_127/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_5/intermediate/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_127/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_127" + op: "RestoreV2" + input: "checkpoint_initializer_127/prefix" + input: "checkpoint_initializer_127/tensor_names" + input: "checkpoint_initializer_127/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_127" + op: "Assign" + input: "bert/encoder/layer_5/intermediate/dense/bias" + input: "checkpoint_initializer_127" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_128/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_128/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_5/intermediate/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_128/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_128" + op: "RestoreV2" + input: "checkpoint_initializer_128/prefix" + input: "checkpoint_initializer_128/tensor_names" + input: "checkpoint_initializer_128/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_128" + op: "Assign" + input: "bert/encoder/layer_5/intermediate/dense/kernel" + input: "checkpoint_initializer_128" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_129/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_129/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_5/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_129/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_129" + op: "RestoreV2" + input: "checkpoint_initializer_129/prefix" + input: "checkpoint_initializer_129/tensor_names" + input: "checkpoint_initializer_129/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_129" + op: "Assign" + input: "bert/encoder/layer_5/output/LayerNorm/beta" + input: "checkpoint_initializer_129" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_130/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_130/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_5/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_130/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_130" + op: "RestoreV2" + input: "checkpoint_initializer_130/prefix" + input: "checkpoint_initializer_130/tensor_names" + input: "checkpoint_initializer_130/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_130" + op: "Assign" + input: "bert/encoder/layer_5/output/LayerNorm/gamma" + input: "checkpoint_initializer_130" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_131/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_131/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_5/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_131/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_131" + op: "RestoreV2" + input: "checkpoint_initializer_131/prefix" + input: "checkpoint_initializer_131/tensor_names" + input: "checkpoint_initializer_131/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_131" + op: "Assign" + input: "bert/encoder/layer_5/output/dense/bias" + input: "checkpoint_initializer_131" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_132/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_132/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_5/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_132/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_132" + op: "RestoreV2" + input: "checkpoint_initializer_132/prefix" + input: "checkpoint_initializer_132/tensor_names" + input: "checkpoint_initializer_132/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_132" + op: "Assign" + input: "bert/encoder/layer_5/output/dense/kernel" + input: "checkpoint_initializer_132" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_133/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_133/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_133/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_133" + op: "RestoreV2" + input: "checkpoint_initializer_133/prefix" + input: "checkpoint_initializer_133/tensor_names" + input: "checkpoint_initializer_133/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_133" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta" + input: "checkpoint_initializer_133" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_134/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_134/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_134/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_134" + op: "RestoreV2" + input: "checkpoint_initializer_134/prefix" + input: "checkpoint_initializer_134/tensor_names" + input: "checkpoint_initializer_134/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_134" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma" + input: "checkpoint_initializer_134" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_135/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_135/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_6/attention/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_135/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_135" + op: "RestoreV2" + input: "checkpoint_initializer_135/prefix" + input: "checkpoint_initializer_135/tensor_names" + input: "checkpoint_initializer_135/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_135" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/dense/bias" + input: "checkpoint_initializer_135" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_136/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_136/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_6/attention/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_136/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_136" + op: "RestoreV2" + input: "checkpoint_initializer_136/prefix" + input: "checkpoint_initializer_136/tensor_names" + input: "checkpoint_initializer_136/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_136" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/dense/kernel" + input: "checkpoint_initializer_136" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_137/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_137/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_6/attention/self/key/bias" + } + } + } +} +node { + name: "checkpoint_initializer_137/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_137" + op: "RestoreV2" + input: "checkpoint_initializer_137/prefix" + input: "checkpoint_initializer_137/tensor_names" + input: "checkpoint_initializer_137/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_137" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/key/bias" + input: "checkpoint_initializer_137" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_138/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_138/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_6/attention/self/key/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_138/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_138" + op: "RestoreV2" + input: "checkpoint_initializer_138/prefix" + input: "checkpoint_initializer_138/tensor_names" + input: "checkpoint_initializer_138/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_138" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/key/kernel" + input: "checkpoint_initializer_138" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_139/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_139/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_6/attention/self/query/bias" + } + } + } +} +node { + name: "checkpoint_initializer_139/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_139" + op: "RestoreV2" + input: "checkpoint_initializer_139/prefix" + input: "checkpoint_initializer_139/tensor_names" + input: "checkpoint_initializer_139/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_139" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/query/bias" + input: "checkpoint_initializer_139" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_140/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_140/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_6/attention/self/query/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_140/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_140" + op: "RestoreV2" + input: "checkpoint_initializer_140/prefix" + input: "checkpoint_initializer_140/tensor_names" + input: "checkpoint_initializer_140/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_140" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/query/kernel" + input: "checkpoint_initializer_140" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_141/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_141/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_6/attention/self/value/bias" + } + } + } +} +node { + name: "checkpoint_initializer_141/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_141" + op: "RestoreV2" + input: "checkpoint_initializer_141/prefix" + input: "checkpoint_initializer_141/tensor_names" + input: "checkpoint_initializer_141/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_141" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/value/bias" + input: "checkpoint_initializer_141" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_142/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_142/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_6/attention/self/value/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_142/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_142" + op: "RestoreV2" + input: "checkpoint_initializer_142/prefix" + input: "checkpoint_initializer_142/tensor_names" + input: "checkpoint_initializer_142/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_142" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/value/kernel" + input: "checkpoint_initializer_142" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_143/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_143/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_6/intermediate/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_143/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_143" + op: "RestoreV2" + input: "checkpoint_initializer_143/prefix" + input: "checkpoint_initializer_143/tensor_names" + input: "checkpoint_initializer_143/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_143" + op: "Assign" + input: "bert/encoder/layer_6/intermediate/dense/bias" + input: "checkpoint_initializer_143" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_144/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_144/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_6/intermediate/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_144/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_144" + op: "RestoreV2" + input: "checkpoint_initializer_144/prefix" + input: "checkpoint_initializer_144/tensor_names" + input: "checkpoint_initializer_144/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_144" + op: "Assign" + input: "bert/encoder/layer_6/intermediate/dense/kernel" + input: "checkpoint_initializer_144" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_145/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_145/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_6/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_145/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_145" + op: "RestoreV2" + input: "checkpoint_initializer_145/prefix" + input: "checkpoint_initializer_145/tensor_names" + input: "checkpoint_initializer_145/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_145" + op: "Assign" + input: "bert/encoder/layer_6/output/LayerNorm/beta" + input: "checkpoint_initializer_145" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_146/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_146/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_6/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_146/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_146" + op: "RestoreV2" + input: "checkpoint_initializer_146/prefix" + input: "checkpoint_initializer_146/tensor_names" + input: "checkpoint_initializer_146/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_146" + op: "Assign" + input: "bert/encoder/layer_6/output/LayerNorm/gamma" + input: "checkpoint_initializer_146" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_147/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_147/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_6/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_147/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_147" + op: "RestoreV2" + input: "checkpoint_initializer_147/prefix" + input: "checkpoint_initializer_147/tensor_names" + input: "checkpoint_initializer_147/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_147" + op: "Assign" + input: "bert/encoder/layer_6/output/dense/bias" + input: "checkpoint_initializer_147" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_148/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_148/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_6/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_148/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_148" + op: "RestoreV2" + input: "checkpoint_initializer_148/prefix" + input: "checkpoint_initializer_148/tensor_names" + input: "checkpoint_initializer_148/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_148" + op: "Assign" + input: "bert/encoder/layer_6/output/dense/kernel" + input: "checkpoint_initializer_148" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_149/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_149/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_149/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_149" + op: "RestoreV2" + input: "checkpoint_initializer_149/prefix" + input: "checkpoint_initializer_149/tensor_names" + input: "checkpoint_initializer_149/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_149" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta" + input: "checkpoint_initializer_149" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_150/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_150/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_150/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_150" + op: "RestoreV2" + input: "checkpoint_initializer_150/prefix" + input: "checkpoint_initializer_150/tensor_names" + input: "checkpoint_initializer_150/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_150" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma" + input: "checkpoint_initializer_150" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_151/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_151/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_7/attention/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_151/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_151" + op: "RestoreV2" + input: "checkpoint_initializer_151/prefix" + input: "checkpoint_initializer_151/tensor_names" + input: "checkpoint_initializer_151/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_151" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/dense/bias" + input: "checkpoint_initializer_151" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_152/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_152/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_7/attention/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_152/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_152" + op: "RestoreV2" + input: "checkpoint_initializer_152/prefix" + input: "checkpoint_initializer_152/tensor_names" + input: "checkpoint_initializer_152/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_152" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/dense/kernel" + input: "checkpoint_initializer_152" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_153/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_153/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_7/attention/self/key/bias" + } + } + } +} +node { + name: "checkpoint_initializer_153/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_153" + op: "RestoreV2" + input: "checkpoint_initializer_153/prefix" + input: "checkpoint_initializer_153/tensor_names" + input: "checkpoint_initializer_153/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_153" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/key/bias" + input: "checkpoint_initializer_153" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_154/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_154/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_7/attention/self/key/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_154/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_154" + op: "RestoreV2" + input: "checkpoint_initializer_154/prefix" + input: "checkpoint_initializer_154/tensor_names" + input: "checkpoint_initializer_154/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_154" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/key/kernel" + input: "checkpoint_initializer_154" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_155/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_155/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_7/attention/self/query/bias" + } + } + } +} +node { + name: "checkpoint_initializer_155/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_155" + op: "RestoreV2" + input: "checkpoint_initializer_155/prefix" + input: "checkpoint_initializer_155/tensor_names" + input: "checkpoint_initializer_155/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_155" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/query/bias" + input: "checkpoint_initializer_155" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_156/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_156/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_7/attention/self/query/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_156/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_156" + op: "RestoreV2" + input: "checkpoint_initializer_156/prefix" + input: "checkpoint_initializer_156/tensor_names" + input: "checkpoint_initializer_156/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_156" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/query/kernel" + input: "checkpoint_initializer_156" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_157/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_157/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_7/attention/self/value/bias" + } + } + } +} +node { + name: "checkpoint_initializer_157/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_157" + op: "RestoreV2" + input: "checkpoint_initializer_157/prefix" + input: "checkpoint_initializer_157/tensor_names" + input: "checkpoint_initializer_157/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_157" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/value/bias" + input: "checkpoint_initializer_157" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_158/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_158/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_7/attention/self/value/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_158/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_158" + op: "RestoreV2" + input: "checkpoint_initializer_158/prefix" + input: "checkpoint_initializer_158/tensor_names" + input: "checkpoint_initializer_158/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_158" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/value/kernel" + input: "checkpoint_initializer_158" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_159/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_159/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_7/intermediate/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_159/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_159" + op: "RestoreV2" + input: "checkpoint_initializer_159/prefix" + input: "checkpoint_initializer_159/tensor_names" + input: "checkpoint_initializer_159/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_159" + op: "Assign" + input: "bert/encoder/layer_7/intermediate/dense/bias" + input: "checkpoint_initializer_159" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_160/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_160/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_7/intermediate/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_160/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_160" + op: "RestoreV2" + input: "checkpoint_initializer_160/prefix" + input: "checkpoint_initializer_160/tensor_names" + input: "checkpoint_initializer_160/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_160" + op: "Assign" + input: "bert/encoder/layer_7/intermediate/dense/kernel" + input: "checkpoint_initializer_160" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_161/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_161/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_7/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_161/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_161" + op: "RestoreV2" + input: "checkpoint_initializer_161/prefix" + input: "checkpoint_initializer_161/tensor_names" + input: "checkpoint_initializer_161/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_161" + op: "Assign" + input: "bert/encoder/layer_7/output/LayerNorm/beta" + input: "checkpoint_initializer_161" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_162/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_162/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_7/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_162/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_162" + op: "RestoreV2" + input: "checkpoint_initializer_162/prefix" + input: "checkpoint_initializer_162/tensor_names" + input: "checkpoint_initializer_162/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_162" + op: "Assign" + input: "bert/encoder/layer_7/output/LayerNorm/gamma" + input: "checkpoint_initializer_162" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_163/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_163/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_7/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_163/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_163" + op: "RestoreV2" + input: "checkpoint_initializer_163/prefix" + input: "checkpoint_initializer_163/tensor_names" + input: "checkpoint_initializer_163/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_163" + op: "Assign" + input: "bert/encoder/layer_7/output/dense/bias" + input: "checkpoint_initializer_163" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_164/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_164/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_7/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_164/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_164" + op: "RestoreV2" + input: "checkpoint_initializer_164/prefix" + input: "checkpoint_initializer_164/tensor_names" + input: "checkpoint_initializer_164/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_164" + op: "Assign" + input: "bert/encoder/layer_7/output/dense/kernel" + input: "checkpoint_initializer_164" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_165/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_165/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_165/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_165" + op: "RestoreV2" + input: "checkpoint_initializer_165/prefix" + input: "checkpoint_initializer_165/tensor_names" + input: "checkpoint_initializer_165/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_165" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta" + input: "checkpoint_initializer_165" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_166/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_166/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_166/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_166" + op: "RestoreV2" + input: "checkpoint_initializer_166/prefix" + input: "checkpoint_initializer_166/tensor_names" + input: "checkpoint_initializer_166/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_166" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma" + input: "checkpoint_initializer_166" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_167/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_167/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_8/attention/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_167/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_167" + op: "RestoreV2" + input: "checkpoint_initializer_167/prefix" + input: "checkpoint_initializer_167/tensor_names" + input: "checkpoint_initializer_167/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_167" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/dense/bias" + input: "checkpoint_initializer_167" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_168/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_168/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_8/attention/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_168/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_168" + op: "RestoreV2" + input: "checkpoint_initializer_168/prefix" + input: "checkpoint_initializer_168/tensor_names" + input: "checkpoint_initializer_168/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_168" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/dense/kernel" + input: "checkpoint_initializer_168" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_169/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_169/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_8/attention/self/key/bias" + } + } + } +} +node { + name: "checkpoint_initializer_169/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_169" + op: "RestoreV2" + input: "checkpoint_initializer_169/prefix" + input: "checkpoint_initializer_169/tensor_names" + input: "checkpoint_initializer_169/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_169" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/key/bias" + input: "checkpoint_initializer_169" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_170/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_170/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_8/attention/self/key/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_170/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_170" + op: "RestoreV2" + input: "checkpoint_initializer_170/prefix" + input: "checkpoint_initializer_170/tensor_names" + input: "checkpoint_initializer_170/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_170" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/key/kernel" + input: "checkpoint_initializer_170" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_171/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_171/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_8/attention/self/query/bias" + } + } + } +} +node { + name: "checkpoint_initializer_171/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_171" + op: "RestoreV2" + input: "checkpoint_initializer_171/prefix" + input: "checkpoint_initializer_171/tensor_names" + input: "checkpoint_initializer_171/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_171" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/query/bias" + input: "checkpoint_initializer_171" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_172/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_172/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_8/attention/self/query/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_172/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_172" + op: "RestoreV2" + input: "checkpoint_initializer_172/prefix" + input: "checkpoint_initializer_172/tensor_names" + input: "checkpoint_initializer_172/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_172" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/query/kernel" + input: "checkpoint_initializer_172" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_173/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_173/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_8/attention/self/value/bias" + } + } + } +} +node { + name: "checkpoint_initializer_173/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_173" + op: "RestoreV2" + input: "checkpoint_initializer_173/prefix" + input: "checkpoint_initializer_173/tensor_names" + input: "checkpoint_initializer_173/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_173" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/value/bias" + input: "checkpoint_initializer_173" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_174/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_174/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_8/attention/self/value/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_174/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_174" + op: "RestoreV2" + input: "checkpoint_initializer_174/prefix" + input: "checkpoint_initializer_174/tensor_names" + input: "checkpoint_initializer_174/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_174" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/value/kernel" + input: "checkpoint_initializer_174" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_175/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_175/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_8/intermediate/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_175/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_175" + op: "RestoreV2" + input: "checkpoint_initializer_175/prefix" + input: "checkpoint_initializer_175/tensor_names" + input: "checkpoint_initializer_175/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_175" + op: "Assign" + input: "bert/encoder/layer_8/intermediate/dense/bias" + input: "checkpoint_initializer_175" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_176/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_176/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_8/intermediate/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_176/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_176" + op: "RestoreV2" + input: "checkpoint_initializer_176/prefix" + input: "checkpoint_initializer_176/tensor_names" + input: "checkpoint_initializer_176/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_176" + op: "Assign" + input: "bert/encoder/layer_8/intermediate/dense/kernel" + input: "checkpoint_initializer_176" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_177/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_177/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_8/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_177/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_177" + op: "RestoreV2" + input: "checkpoint_initializer_177/prefix" + input: "checkpoint_initializer_177/tensor_names" + input: "checkpoint_initializer_177/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_177" + op: "Assign" + input: "bert/encoder/layer_8/output/LayerNorm/beta" + input: "checkpoint_initializer_177" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_178/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_178/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_8/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_178/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_178" + op: "RestoreV2" + input: "checkpoint_initializer_178/prefix" + input: "checkpoint_initializer_178/tensor_names" + input: "checkpoint_initializer_178/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_178" + op: "Assign" + input: "bert/encoder/layer_8/output/LayerNorm/gamma" + input: "checkpoint_initializer_178" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_179/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_179/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_8/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_179/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_179" + op: "RestoreV2" + input: "checkpoint_initializer_179/prefix" + input: "checkpoint_initializer_179/tensor_names" + input: "checkpoint_initializer_179/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_179" + op: "Assign" + input: "bert/encoder/layer_8/output/dense/bias" + input: "checkpoint_initializer_179" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_180/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_180/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_8/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_180/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_180" + op: "RestoreV2" + input: "checkpoint_initializer_180/prefix" + input: "checkpoint_initializer_180/tensor_names" + input: "checkpoint_initializer_180/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_180" + op: "Assign" + input: "bert/encoder/layer_8/output/dense/kernel" + input: "checkpoint_initializer_180" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_181/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_181/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_181/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_181" + op: "RestoreV2" + input: "checkpoint_initializer_181/prefix" + input: "checkpoint_initializer_181/tensor_names" + input: "checkpoint_initializer_181/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_181" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta" + input: "checkpoint_initializer_181" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_182/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_182/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_182/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_182" + op: "RestoreV2" + input: "checkpoint_initializer_182/prefix" + input: "checkpoint_initializer_182/tensor_names" + input: "checkpoint_initializer_182/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_182" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma" + input: "checkpoint_initializer_182" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_183/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_183/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_9/attention/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_183/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_183" + op: "RestoreV2" + input: "checkpoint_initializer_183/prefix" + input: "checkpoint_initializer_183/tensor_names" + input: "checkpoint_initializer_183/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_183" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/dense/bias" + input: "checkpoint_initializer_183" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_184/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_184/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_9/attention/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_184/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_184" + op: "RestoreV2" + input: "checkpoint_initializer_184/prefix" + input: "checkpoint_initializer_184/tensor_names" + input: "checkpoint_initializer_184/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_184" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/dense/kernel" + input: "checkpoint_initializer_184" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_185/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_185/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_9/attention/self/key/bias" + } + } + } +} +node { + name: "checkpoint_initializer_185/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_185" + op: "RestoreV2" + input: "checkpoint_initializer_185/prefix" + input: "checkpoint_initializer_185/tensor_names" + input: "checkpoint_initializer_185/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_185" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/key/bias" + input: "checkpoint_initializer_185" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_186/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_186/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_9/attention/self/key/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_186/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_186" + op: "RestoreV2" + input: "checkpoint_initializer_186/prefix" + input: "checkpoint_initializer_186/tensor_names" + input: "checkpoint_initializer_186/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_186" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/key/kernel" + input: "checkpoint_initializer_186" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_187/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_187/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_9/attention/self/query/bias" + } + } + } +} +node { + name: "checkpoint_initializer_187/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_187" + op: "RestoreV2" + input: "checkpoint_initializer_187/prefix" + input: "checkpoint_initializer_187/tensor_names" + input: "checkpoint_initializer_187/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_187" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/query/bias" + input: "checkpoint_initializer_187" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_188/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_188/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_9/attention/self/query/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_188/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_188" + op: "RestoreV2" + input: "checkpoint_initializer_188/prefix" + input: "checkpoint_initializer_188/tensor_names" + input: "checkpoint_initializer_188/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_188" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/query/kernel" + input: "checkpoint_initializer_188" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_189/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_189/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_9/attention/self/value/bias" + } + } + } +} +node { + name: "checkpoint_initializer_189/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_189" + op: "RestoreV2" + input: "checkpoint_initializer_189/prefix" + input: "checkpoint_initializer_189/tensor_names" + input: "checkpoint_initializer_189/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_189" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/value/bias" + input: "checkpoint_initializer_189" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_190/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_190/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_9/attention/self/value/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_190/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_190" + op: "RestoreV2" + input: "checkpoint_initializer_190/prefix" + input: "checkpoint_initializer_190/tensor_names" + input: "checkpoint_initializer_190/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_190" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/value/kernel" + input: "checkpoint_initializer_190" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_191/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_191/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_9/intermediate/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_191/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_191" + op: "RestoreV2" + input: "checkpoint_initializer_191/prefix" + input: "checkpoint_initializer_191/tensor_names" + input: "checkpoint_initializer_191/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_191" + op: "Assign" + input: "bert/encoder/layer_9/intermediate/dense/bias" + input: "checkpoint_initializer_191" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_192/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_192/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_9/intermediate/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_192/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_192" + op: "RestoreV2" + input: "checkpoint_initializer_192/prefix" + input: "checkpoint_initializer_192/tensor_names" + input: "checkpoint_initializer_192/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_192" + op: "Assign" + input: "bert/encoder/layer_9/intermediate/dense/kernel" + input: "checkpoint_initializer_192" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_193/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_193/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_9/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_193/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_193" + op: "RestoreV2" + input: "checkpoint_initializer_193/prefix" + input: "checkpoint_initializer_193/tensor_names" + input: "checkpoint_initializer_193/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_193" + op: "Assign" + input: "bert/encoder/layer_9/output/LayerNorm/beta" + input: "checkpoint_initializer_193" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_194/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_194/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_9/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_194/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_194" + op: "RestoreV2" + input: "checkpoint_initializer_194/prefix" + input: "checkpoint_initializer_194/tensor_names" + input: "checkpoint_initializer_194/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_194" + op: "Assign" + input: "bert/encoder/layer_9/output/LayerNorm/gamma" + input: "checkpoint_initializer_194" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_195/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_195/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_9/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_195/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_195" + op: "RestoreV2" + input: "checkpoint_initializer_195/prefix" + input: "checkpoint_initializer_195/tensor_names" + input: "checkpoint_initializer_195/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_195" + op: "Assign" + input: "bert/encoder/layer_9/output/dense/bias" + input: "checkpoint_initializer_195" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_196/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_196/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_9/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_196/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_196" + op: "RestoreV2" + input: "checkpoint_initializer_196/prefix" + input: "checkpoint_initializer_196/tensor_names" + input: "checkpoint_initializer_196/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_196" + op: "Assign" + input: "bert/encoder/layer_9/output/dense/kernel" + input: "checkpoint_initializer_196" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_197/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_197/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/pooler/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_197/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_197" + op: "RestoreV2" + input: "checkpoint_initializer_197/prefix" + input: "checkpoint_initializer_197/tensor_names" + input: "checkpoint_initializer_197/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_197" + op: "Assign" + input: "bert/pooler/dense/bias" + input: "checkpoint_initializer_197" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_198/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "./chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_198/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/pooler/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_198/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_198" + op: "RestoreV2" + input: "checkpoint_initializer_198/prefix" + input: "checkpoint_initializer_198/tensor_names" + input: "checkpoint_initializer_198/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_198" + op: "Assign" + input: "bert/pooler/dense/kernel" + input: "checkpoint_initializer_198" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Const_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.9999999494757503e-05 + } + } + } +} +node { + name: "PolynomialDecay/Cast/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "PolynomialDecay/Cast_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "PolynomialDecay/Cast_2/ReadVariableOp" + op: "ReadVariableOp" + input: "global_step" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } +} +node { + name: "PolynomialDecay/Cast_2" + op: "Cast" + input: "PolynomialDecay/Cast_2/ReadVariableOp" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT64 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "PolynomialDecay/Cast_3/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 14062 + } + } + } +} +node { + name: "PolynomialDecay/Cast_3" + op: "Cast" + input: "PolynomialDecay/Cast_3/x" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "PolynomialDecay/Minimum/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 14062.0 + } + } + } +} +node { + name: "PolynomialDecay/Minimum" + op: "Minimum" + input: "PolynomialDecay/Cast_2" + input: "PolynomialDecay/Minimum/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "PolynomialDecay/div" + op: "RealDiv" + input: "PolynomialDecay/Minimum" + input: "PolynomialDecay/Cast_3" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "PolynomialDecay/sub" + op: "Sub" + input: "Const_1" + input: "PolynomialDecay/Cast/x" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "PolynomialDecay/sub_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "PolynomialDecay/sub_1" + op: "Sub" + input: "PolynomialDecay/sub_1/x" + input: "PolynomialDecay/div" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "PolynomialDecay/Pow" + op: "Pow" + input: "PolynomialDecay/sub_1" + input: "PolynomialDecay/Cast_1/x" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "PolynomialDecay/Mul" + op: "Mul" + input: "PolynomialDecay/sub" + input: "PolynomialDecay/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "PolynomialDecay" + op: "Add" + input: "PolynomialDecay/Mul" + input: "PolynomialDecay/Cast/x" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Cast_1/ReadVariableOp" + op: "ReadVariableOp" + input: "global_step" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } +} +node { + name: "Cast_1" + op: "Cast" + input: "Cast_1/ReadVariableOp" + attr { + key: "DstT" + value { + type: DT_INT32 + } + } + attr { + key: "SrcT" + value { + type: DT_INT64 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Const_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1406 + } + } + } +} +node { + name: "Cast_2" + op: "Cast" + input: "Cast_1" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Cast_3" + op: "Cast" + input: "Const_2" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "truediv" + op: "RealDiv" + input: "Cast_2" + input: "Cast_3" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.9999999494757503e-05 + } + } + } +} +node { + name: "mul" + op: "Mul" + input: "mul/x" + input: "truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Less" + op: "Less" + input: "Cast_1" + input: "Const_2" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Cast_4" + op: "Cast" + input: "Less" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_BOOL + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "sub" + op: "Sub" + input: "sub/x" + input: "Cast_4" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_1" + op: "Mul" + input: "sub" + input: "PolynomialDecay" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_2" + op: "Mul" + input: "Cast_4" + input: "mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "add" + op: "Add" + input: "mul_1" + input: "mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/grad_ys_0" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/Fill" + op: "Fill" + input: "gradients/Shape" + input: "gradients/grad_ys_0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/loss/Mean_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/loss/Mean_grad/Reshape" + op: "Reshape" + input: "gradients/Fill" + input: "gradients/loss/Mean_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/loss/Mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 32 + } + } + } +} +node { + name: "gradients/loss/Mean_grad/Tile" + op: "Tile" + input: "gradients/loss/Mean_grad/Reshape" + input: "gradients/loss/Mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + } + } + } + } +} +node { + name: "gradients/loss/Mean_grad/Const_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 32.0 + } + } + } +} +node { + name: "gradients/loss/Mean_grad/truediv" + op: "RealDiv" + input: "gradients/loss/Mean_grad/Tile" + input: "gradients/loss/Mean_grad/Const_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + } + } + } + } +} +node { + name: "gradients/loss/Neg_grad/Neg" + op: "Neg" + input: "gradients/loss/Mean_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + } + } + } + } +} +node { + name: "gradients/loss/Sum_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: " \000\000\000\003\000\000\000" + } + } + } +} +node { + name: "gradients/loss/Sum_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/loss/Sum_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/loss/Sum_grad/add" + op: "Add" + input: "loss/Sum/reduction_indices" + input: "gradients/loss/Sum_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/loss/Sum_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/loss/Sum_grad/mod" + op: "FloorMod" + input: "gradients/loss/Sum_grad/add" + input: "gradients/loss/Sum_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/loss/Sum_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/loss/Sum_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/loss/Sum_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/loss/Sum_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/loss/Sum_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/loss/Sum_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/loss/Sum_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/loss/Sum_grad/range" + op: "Range" + input: "gradients/loss/Sum_grad/range/start" + input: "gradients/loss/Sum_grad/Size" + input: "gradients/loss/Sum_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/loss/Sum_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/loss/Sum_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/loss/Sum_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/loss/Sum_grad/Fill" + op: "Fill" + input: "gradients/loss/Sum_grad/Shape_1" + input: "gradients/loss/Sum_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/loss/Sum_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/loss/Sum_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/loss/Sum_grad/range" + input: "gradients/loss/Sum_grad/mod" + input: "gradients/loss/Sum_grad/Shape" + input: "gradients/loss/Sum_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/loss/Sum_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/loss/Sum_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/loss/Sum_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/loss/Sum_grad/Maximum" + op: "Maximum" + input: "gradients/loss/Sum_grad/DynamicStitch" + input: "gradients/loss/Sum_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/loss/Sum_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/loss/Sum_grad/floordiv" + op: "FloorDiv" + input: "gradients/loss/Sum_grad/Shape" + input: "gradients/loss/Sum_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/loss/Sum_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/loss/Sum_grad/Reshape" + op: "Reshape" + input: "gradients/loss/Neg_grad/Neg" + input: "gradients/loss/Sum_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/loss/Sum_grad/Tile" + op: "Tile" + input: "gradients/loss/Sum_grad/Reshape" + input: "gradients/loss/Sum_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/loss/mul_grad/Mul" + op: "Mul" + input: "gradients/loss/Sum_grad/Tile" + input: "loss/LogSoftmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/loss/mul_grad/Mul_1" + op: "Mul" + input: "gradients/loss/Sum_grad/Tile" + input: "loss/one_hot" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/loss/LogSoftmax_grad/Exp" + op: "Exp" + input: "loss/LogSoftmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/loss/LogSoftmax_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "gradients/loss/LogSoftmax_grad/Sum" + op: "Sum" + input: "gradients/loss/mul_grad/Mul_1" + input: "gradients/loss/LogSoftmax_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "gradients/loss/LogSoftmax_grad/mul" + op: "Mul" + input: "gradients/loss/LogSoftmax_grad/Sum" + input: "gradients/loss/LogSoftmax_grad/Exp" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/loss/LogSoftmax_grad/sub" + op: "Sub" + input: "gradients/loss/mul_grad/Mul_1" + input: "gradients/loss/LogSoftmax_grad/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/loss/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/loss/LogSoftmax_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/loss/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/loss/LogSoftmax_grad/sub" + input: "output_weights/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/loss/MatMul_grad/MatMul_1" + op: "MatMul" + input: "gradients/loss/LogSoftmax_grad/sub" + input: "loss/dropout/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/loss/dropout/mul_1_grad/Mul" + op: "Mul" + input: "gradients/loss/MatMul_grad/MatMul" + input: "loss/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/loss/dropout/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/loss/MatMul_grad/MatMul" + input: "loss/dropout/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/loss/dropout/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: " \000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/loss/dropout/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/loss/dropout/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/loss/dropout/mul_grad/Shape" + input: "gradients/loss/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/loss/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/loss/dropout/mul_1_grad/Mul" + input: "loss/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/loss/dropout/mul_grad/Sum" + op: "Sum" + input: "gradients/loss/dropout/mul_grad/Mul" + input: "gradients/loss/dropout/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/loss/dropout/mul_grad/Reshape" + op: "Reshape" + input: "gradients/loss/dropout/mul_grad/Sum" + input: "gradients/loss/dropout/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/loss/dropout/mul_grad/Mul_1" + op: "Mul" + input: "bert/pooler/dense/Tanh" + input: "gradients/loss/dropout/mul_1_grad/Mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/loss/dropout/mul_grad/Sum_1" + op: "Sum" + input: "gradients/loss/dropout/mul_grad/Mul_1" + input: "gradients/loss/dropout/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/loss/dropout/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/loss/dropout/mul_grad/Sum_1" + input: "gradients/loss/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/pooler/dense/Tanh_grad/TanhGrad" + op: "TanhGrad" + input: "bert/pooler/dense/Tanh" + input: "gradients/loss/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/pooler/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/pooler/dense/Tanh_grad/TanhGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/pooler/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/pooler/dense/Tanh_grad/TanhGrad" + input: "bert/pooler/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/pooler/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/pooler/Squeeze" + input: "gradients/bert/pooler/dense/Tanh_grad/TanhGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/pooler/Squeeze_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/pooler/Squeeze_grad/Reshape" + op: "Reshape" + input: "gradients/bert/pooler/dense/MatMul_grad/MatMul" + input: "gradients/bert/pooler/Squeeze_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/pooler/strided_slice_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/pooler/strided_slice_grad/StridedSliceGrad" + op: "StridedSliceGrad" + input: "gradients/bert/pooler/strided_slice_grad/Shape" + input: "bert/pooler/strided_slice/stack" + input: "bert/pooler/strided_slice/stack_1" + input: "bert/pooler/strided_slice/stack_2" + input: "gradients/bert/pooler/Squeeze_grad/Reshape" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 5 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 5 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "gradients/bert/encoder/Reshape_13_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/Reshape_13_grad/Reshape" + op: "Reshape" + input: "gradients/bert/pooler/strided_slice_grad/StridedSliceGrad" + input: "gradients/bert/encoder/Reshape_13_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/Reshape_13_grad/Reshape" + input: "bert/encoder/layer_11/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/Reshape_13_grad/Reshape" + input: "bert/encoder/layer_11/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/Reshape_13_grad/Reshape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/Reshape_13_grad/Reshape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_11/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_11/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN" + op: "AddN" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_11/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_11/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_11/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_11/output/add" + input: "bert/encoder/layer_11/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_11/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_1" + op: "AddN" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/dropout/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_1" + input: "bert/encoder/layer_11/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/dropout/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_1" + input: "bert/encoder/layer_11/output/dropout/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/Shape" + input: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_11/output/dropout/mul_1_grad/Mul" + input: "bert/encoder/layer_11/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/Sum" + input: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_11/output/dense/BiasAdd" + input: "gradients/bert/encoder/layer_11/output/dropout/mul_1_grad/Mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/Reshape" + input: "bert/encoder/layer_11/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_11/intermediate/dense/mul_3" + input: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_3_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_11/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_11/intermediate/dense/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_3_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_11/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_11/intermediate/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_3_grad/Mul_1" + input: "bert/encoder/layer_11/intermediate/dense/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_11/intermediate/dense/mul_2/x" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_3_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/add_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/add_1_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/add_1_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_11/intermediate/dense/add_1_grad/Shape" + input: "gradients/bert/encoder/layer_11/intermediate/dense/add_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/add_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_2_grad/Reshape_1" + input: "gradients/bert/encoder/layer_11/intermediate/dense/add_1_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/add_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/intermediate/dense/add_1_grad/Sum" + input: "gradients/bert/encoder/layer_11/intermediate/dense/add_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/add_1_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_2_grad/Reshape_1" + input: "gradients/bert/encoder/layer_11/intermediate/dense/add_1_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/add_1_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/intermediate/dense/add_1_grad/Sum_1" + input: "gradients/bert/encoder/layer_11/intermediate/dense/add_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/Tanh_grad/TanhGrad" + op: "TanhGrad" + input: "bert/encoder/layer_11/intermediate/dense/Tanh" + input: "gradients/bert/encoder/layer_11/intermediate/dense/add_1_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Shape" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_11/intermediate/dense/Tanh_grad/TanhGrad" + input: "bert/encoder/layer_11/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Sum" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_11/intermediate/dense/mul_1/x" + input: "gradients/bert/encoder/layer_11/intermediate/dense/Tanh_grad/TanhGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Sum_1" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Shape" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Reshape_1" + input: "bert/encoder/layer_11/intermediate/dense/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Mul" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Sum" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_11/intermediate/dense/mul/x" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Shape" + input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Reshape_1" + input: "bert/encoder/layer_11/intermediate/dense/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/sub/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/sub" + op: "Sub" + input: "bert/encoder/layer_11/intermediate/dense/Pow/y" + input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/sub/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Pow" + op: "Pow" + input: "bert/encoder/layer_11/intermediate/dense/BiasAdd" + input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/mul" + input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/mul_1" + input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Sum" + input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Greater/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Greater" + op: "Greater" + input: "bert/encoder/layer_11/intermediate/dense/BiasAdd" + input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Greater/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/ones_like/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/ones_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/ones_like" + op: "Fill" + input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/ones_like/Shape" + input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/ones_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Select" + op: "Select" + input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Greater" + input: "bert/encoder/layer_11/intermediate/dense/BiasAdd" + input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/ones_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Log" + op: "Log" + input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/zeros_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/zeros_like" + op: "Fill" + input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor" + input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/zeros_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Select_1" + op: "Select" + input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Greater" + input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Log" + input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/zeros_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/mul_2" + op: "Mul" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Reshape_1" + input: "bert/encoder/layer_11/intermediate/dense/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/mul_3" + op: "Mul" + input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/mul_2" + input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Select_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/mul_3" + input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Sum_1" + input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/AddN_2" + op: "AddN" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_3_grad/Mul" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Reshape_1" + input: "gradients/bert/encoder/layer_11/intermediate/dense/Pow_grad/Reshape" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/intermediate/dense/mul_3_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/AddN_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/AddN_2" + input: "bert/encoder/layer_11/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_1" + input: "gradients/AddN_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_3" + op: "AddN" + input: "gradients/AddN_1" + input: "gradients/bert/encoder/layer_11/intermediate/dense/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_3" + input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_3" + input: "bert/encoder/layer_11/attention/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_3" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_3" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_11/attention/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_4" + op: "AddN" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_4" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_4" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_11/attention/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_11/attention/output/add" + input: "bert/encoder/layer_11/attention/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_11/attention/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_5" + op: "AddN" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_5" + input: "bert/encoder/layer_11/attention/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_5" + input: "bert/encoder/layer_11/attention/output/dropout/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/Shape" + input: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_1_grad/Mul" + input: "bert/encoder/layer_11/attention/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/Sum" + input: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_11/attention/output/dense/BiasAdd" + input: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_1_grad/Mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/Reshape" + input: "bert/encoder/layer_11/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_11/attention/self/Reshape_3" + input: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Reshape_3_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Reshape_3_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/output/dense/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_11/attention/self/Reshape_3_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/transpose_3_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_11/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/transpose_3_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_11/attention/self/Reshape_3_grad/Reshape" + input: "gradients/bert/encoder/layer_11/attention/self/transpose_3_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/layer_11/attention/self/transpose_3_grad/transpose" + input: "bert/encoder/layer_11/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/layer_11/attention/self/dropout/mul_1" + input: "gradients/bert/encoder/layer_11/attention/self/transpose_3_grad/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/strided_slice/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/strided_slice/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/strided_slice/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/strided_slice" + op: "StridedSlice" + input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/Shape" + input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/strided_slice/stack" + input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/strided_slice/stack_1" + input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/strided_slice/stack_2" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 1 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/strided_slice_1/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/strided_slice_1/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/strided_slice_1/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/strided_slice_1" + op: "StridedSlice" + input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/Shape_1" + input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/strided_slice_1/stack" + input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/strided_slice_1/stack_1" + input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/strided_slice_1/stack_2" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 1 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/strided_slice" + input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/strided_slice_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/MatMul" + input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/Sum" + input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/MatMul_1" + input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/Sum_1" + input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/Reshape" + input: "bert/encoder/layer_11/attention/self/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/Reshape" + input: "bert/encoder/layer_11/attention/self/dropout/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/transpose_2_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_11/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/transpose_2_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/Reshape_1" + input: "gradients/bert/encoder/layer_11/attention/self/transpose_2_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/Shape" + input: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_1_grad/Mul" + input: "bert/encoder/layer_11/attention/self/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/Sum" + input: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_11/attention/self/Softmax" + input: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_1_grad/Mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/layer_11/attention/self/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Softmax_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/Reshape" + input: "bert/encoder/layer_11/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Softmax_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Softmax_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_11/attention/self/Softmax_grad/mul" + input: "gradients/bert/encoder/layer_11/attention/self/Softmax_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Softmax_grad/sub" + op: "Sub" + input: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/Reshape" + input: "gradients/bert/encoder/layer_11/attention/self/Softmax_grad/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Softmax_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_11/attention/self/Softmax_grad/sub" + input: "bert/encoder/layer_11/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/value/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_11/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\001\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_11/attention/self/add_grad/Shape" + input: "gradients/bert/encoder/layer_11/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_11/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_11/attention/self/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/self/add_grad/Sum" + input: "gradients/bert/encoder/layer_11/attention/self/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_11/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_11/attention/self/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/self/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_11/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/value/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_11/attention/self/Reshape_2_grad/Reshape" + input: "bert/encoder/layer_11/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/value/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_11/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Shape" + input: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_11/attention/self/add_grad/Reshape" + input: "bert/encoder/layer_11/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Mul" + input: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Sum" + input: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_11/attention/self/MatMul" + input: "gradients/bert/encoder/layer_11/attention/self/add_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/MatMul_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_11/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/MatMul_grad/MatMul_1" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_11/attention/self/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_11/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_11/attention/self/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_11/attention/self/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_11/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_11/attention/self/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/layer_11/attention/self/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/self/transpose_grad/transpose" + input: "gradients/bert/encoder/layer_11/attention/self/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/layer_11/attention/self/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/query/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_11/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/key/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_11/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/query/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_11/attention/self/Reshape_grad/Reshape" + input: "bert/encoder/layer_11/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/query/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_11/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/key/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_11/attention/self/Reshape_1_grad/Reshape" + input: "bert/encoder/layer_11/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/key/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_11/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_6" + op: "AddN" + input: "gradients/AddN_5" + input: "gradients/bert/encoder/layer_11/attention/self/value/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_11/attention/self/query/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_11/attention/self/key/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 4 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_6" + input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_6" + input: "bert/encoder/layer_10/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_6" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_6" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_10/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_7" + op: "AddN" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_7" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_7" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_10/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_10/output/add" + input: "bert/encoder/layer_10/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_10/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_8" + op: "AddN" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/dropout/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_8" + input: "bert/encoder/layer_10/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/dropout/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_8" + input: "bert/encoder/layer_10/output/dropout/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/Shape" + input: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_10/output/dropout/mul_1_grad/Mul" + input: "bert/encoder/layer_10/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/Sum" + input: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_10/output/dense/BiasAdd" + input: "gradients/bert/encoder/layer_10/output/dropout/mul_1_grad/Mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/Reshape" + input: "bert/encoder/layer_10/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_10/intermediate/dense/mul_3" + input: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_3_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_10/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_10/intermediate/dense/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_3_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_10/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_10/intermediate/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_3_grad/Mul_1" + input: "bert/encoder/layer_10/intermediate/dense/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_10/intermediate/dense/mul_2/x" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_3_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/add_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/add_1_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/add_1_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_10/intermediate/dense/add_1_grad/Shape" + input: "gradients/bert/encoder/layer_10/intermediate/dense/add_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/add_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_2_grad/Reshape_1" + input: "gradients/bert/encoder/layer_10/intermediate/dense/add_1_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/add_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/intermediate/dense/add_1_grad/Sum" + input: "gradients/bert/encoder/layer_10/intermediate/dense/add_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/add_1_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_2_grad/Reshape_1" + input: "gradients/bert/encoder/layer_10/intermediate/dense/add_1_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/add_1_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/intermediate/dense/add_1_grad/Sum_1" + input: "gradients/bert/encoder/layer_10/intermediate/dense/add_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/Tanh_grad/TanhGrad" + op: "TanhGrad" + input: "bert/encoder/layer_10/intermediate/dense/Tanh" + input: "gradients/bert/encoder/layer_10/intermediate/dense/add_1_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Shape" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_10/intermediate/dense/Tanh_grad/TanhGrad" + input: "bert/encoder/layer_10/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Sum" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_10/intermediate/dense/mul_1/x" + input: "gradients/bert/encoder/layer_10/intermediate/dense/Tanh_grad/TanhGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Sum_1" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Shape" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Reshape_1" + input: "bert/encoder/layer_10/intermediate/dense/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Mul" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Sum" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_10/intermediate/dense/mul/x" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Shape" + input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Reshape_1" + input: "bert/encoder/layer_10/intermediate/dense/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/sub/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/sub" + op: "Sub" + input: "bert/encoder/layer_10/intermediate/dense/Pow/y" + input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/sub/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Pow" + op: "Pow" + input: "bert/encoder/layer_10/intermediate/dense/BiasAdd" + input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/mul" + input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/mul_1" + input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Sum" + input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Greater/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Greater" + op: "Greater" + input: "bert/encoder/layer_10/intermediate/dense/BiasAdd" + input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Greater/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/ones_like/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/ones_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/ones_like" + op: "Fill" + input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/ones_like/Shape" + input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/ones_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Select" + op: "Select" + input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Greater" + input: "bert/encoder/layer_10/intermediate/dense/BiasAdd" + input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/ones_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Log" + op: "Log" + input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/zeros_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/zeros_like" + op: "Fill" + input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor" + input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/zeros_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Select_1" + op: "Select" + input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Greater" + input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Log" + input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/zeros_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/mul_2" + op: "Mul" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Reshape_1" + input: "bert/encoder/layer_10/intermediate/dense/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/mul_3" + op: "Mul" + input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/mul_2" + input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Select_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/mul_3" + input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Sum_1" + input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/AddN_9" + op: "AddN" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_3_grad/Mul" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Reshape_1" + input: "gradients/bert/encoder/layer_10/intermediate/dense/Pow_grad/Reshape" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/intermediate/dense/mul_3_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/AddN_9" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/AddN_9" + input: "bert/encoder/layer_10/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_1" + input: "gradients/AddN_9" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_10" + op: "AddN" + input: "gradients/AddN_8" + input: "gradients/bert/encoder/layer_10/intermediate/dense/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_10" + input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_10" + input: "bert/encoder/layer_10/attention/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_10" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_10" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_10/attention/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_11" + op: "AddN" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_11" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_11" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_10/attention/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_10/attention/output/add" + input: "bert/encoder/layer_10/attention/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_10/attention/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_12" + op: "AddN" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_12" + input: "bert/encoder/layer_10/attention/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_12" + input: "bert/encoder/layer_10/attention/output/dropout/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/Shape" + input: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_1_grad/Mul" + input: "bert/encoder/layer_10/attention/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/Sum" + input: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_10/attention/output/dense/BiasAdd" + input: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_1_grad/Mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/Reshape" + input: "bert/encoder/layer_10/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_10/attention/self/Reshape_3" + input: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Reshape_3_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Reshape_3_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/output/dense/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_10/attention/self/Reshape_3_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/transpose_3_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_10/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/transpose_3_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_10/attention/self/Reshape_3_grad/Reshape" + input: "gradients/bert/encoder/layer_10/attention/self/transpose_3_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/layer_10/attention/self/transpose_3_grad/transpose" + input: "bert/encoder/layer_10/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/layer_10/attention/self/dropout/mul_1" + input: "gradients/bert/encoder/layer_10/attention/self/transpose_3_grad/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/strided_slice/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/strided_slice/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/strided_slice/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/strided_slice" + op: "StridedSlice" + input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/Shape" + input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/strided_slice/stack" + input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/strided_slice/stack_1" + input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/strided_slice/stack_2" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 1 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/strided_slice_1/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/strided_slice_1/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/strided_slice_1/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/strided_slice_1" + op: "StridedSlice" + input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/Shape_1" + input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/strided_slice_1/stack" + input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/strided_slice_1/stack_1" + input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/strided_slice_1/stack_2" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 1 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/strided_slice" + input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/strided_slice_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/MatMul" + input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/Sum" + input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/MatMul_1" + input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/Sum_1" + input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/Reshape" + input: "bert/encoder/layer_10/attention/self/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/Reshape" + input: "bert/encoder/layer_10/attention/self/dropout/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/transpose_2_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_10/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/transpose_2_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/Reshape_1" + input: "gradients/bert/encoder/layer_10/attention/self/transpose_2_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/Shape" + input: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_1_grad/Mul" + input: "bert/encoder/layer_10/attention/self/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/Sum" + input: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_10/attention/self/Softmax" + input: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_1_grad/Mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/layer_10/attention/self/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Softmax_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/Reshape" + input: "bert/encoder/layer_10/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Softmax_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Softmax_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_10/attention/self/Softmax_grad/mul" + input: "gradients/bert/encoder/layer_10/attention/self/Softmax_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Softmax_grad/sub" + op: "Sub" + input: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/Reshape" + input: "gradients/bert/encoder/layer_10/attention/self/Softmax_grad/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Softmax_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_10/attention/self/Softmax_grad/sub" + input: "bert/encoder/layer_10/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/value/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_10/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\001\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_10/attention/self/add_grad/Shape" + input: "gradients/bert/encoder/layer_10/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_10/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_10/attention/self/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/self/add_grad/Sum" + input: "gradients/bert/encoder/layer_10/attention/self/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_10/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_10/attention/self/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/self/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_10/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/value/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_10/attention/self/Reshape_2_grad/Reshape" + input: "bert/encoder/layer_10/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/value/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_10/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Shape" + input: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_10/attention/self/add_grad/Reshape" + input: "bert/encoder/layer_10/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Mul" + input: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Sum" + input: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_10/attention/self/MatMul" + input: "gradients/bert/encoder/layer_10/attention/self/add_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/MatMul_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_10/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/MatMul_grad/MatMul_1" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_10/attention/self/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_10/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_10/attention/self/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_10/attention/self/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_10/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_10/attention/self/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/layer_10/attention/self/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/self/transpose_grad/transpose" + input: "gradients/bert/encoder/layer_10/attention/self/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/layer_10/attention/self/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/query/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_10/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/key/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_10/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/query/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_10/attention/self/Reshape_grad/Reshape" + input: "bert/encoder/layer_10/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/query/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_10/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/key/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_10/attention/self/Reshape_1_grad/Reshape" + input: "bert/encoder/layer_10/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/key/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_10/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_13" + op: "AddN" + input: "gradients/AddN_12" + input: "gradients/bert/encoder/layer_10/attention/self/value/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_10/attention/self/query/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_10/attention/self/key/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 4 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_13" + input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_13" + input: "bert/encoder/layer_9/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_13" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_13" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_9/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_14" + op: "AddN" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_14" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_14" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_9/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_9/output/add" + input: "bert/encoder/layer_9/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_9/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_15" + op: "AddN" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/dropout/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_15" + input: "bert/encoder/layer_9/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/dropout/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_15" + input: "bert/encoder/layer_9/output/dropout/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/Shape" + input: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_9/output/dropout/mul_1_grad/Mul" + input: "bert/encoder/layer_9/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/Sum" + input: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_9/output/dense/BiasAdd" + input: "gradients/bert/encoder/layer_9/output/dropout/mul_1_grad/Mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/Reshape" + input: "bert/encoder/layer_9/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_9/intermediate/dense/mul_3" + input: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_3_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_9/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_9/intermediate/dense/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_3_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_9/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_9/intermediate/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_3_grad/Mul_1" + input: "bert/encoder/layer_9/intermediate/dense/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_9/intermediate/dense/mul_2/x" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_3_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/add_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/add_1_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/add_1_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_9/intermediate/dense/add_1_grad/Shape" + input: "gradients/bert/encoder/layer_9/intermediate/dense/add_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/add_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_2_grad/Reshape_1" + input: "gradients/bert/encoder/layer_9/intermediate/dense/add_1_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/add_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/intermediate/dense/add_1_grad/Sum" + input: "gradients/bert/encoder/layer_9/intermediate/dense/add_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/add_1_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_2_grad/Reshape_1" + input: "gradients/bert/encoder/layer_9/intermediate/dense/add_1_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/add_1_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/intermediate/dense/add_1_grad/Sum_1" + input: "gradients/bert/encoder/layer_9/intermediate/dense/add_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/Tanh_grad/TanhGrad" + op: "TanhGrad" + input: "bert/encoder/layer_9/intermediate/dense/Tanh" + input: "gradients/bert/encoder/layer_9/intermediate/dense/add_1_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Shape" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_9/intermediate/dense/Tanh_grad/TanhGrad" + input: "bert/encoder/layer_9/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Sum" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_9/intermediate/dense/mul_1/x" + input: "gradients/bert/encoder/layer_9/intermediate/dense/Tanh_grad/TanhGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Sum_1" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Shape" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Reshape_1" + input: "bert/encoder/layer_9/intermediate/dense/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Mul" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Sum" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_9/intermediate/dense/mul/x" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Shape" + input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Reshape_1" + input: "bert/encoder/layer_9/intermediate/dense/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/sub/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/sub" + op: "Sub" + input: "bert/encoder/layer_9/intermediate/dense/Pow/y" + input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/sub/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Pow" + op: "Pow" + input: "bert/encoder/layer_9/intermediate/dense/BiasAdd" + input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/mul" + input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/mul_1" + input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Sum" + input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Greater/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Greater" + op: "Greater" + input: "bert/encoder/layer_9/intermediate/dense/BiasAdd" + input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Greater/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/ones_like/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/ones_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/ones_like" + op: "Fill" + input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/ones_like/Shape" + input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/ones_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Select" + op: "Select" + input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Greater" + input: "bert/encoder/layer_9/intermediate/dense/BiasAdd" + input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/ones_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Log" + op: "Log" + input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/zeros_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/zeros_like" + op: "Fill" + input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor" + input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/zeros_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Select_1" + op: "Select" + input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Greater" + input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Log" + input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/zeros_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/mul_2" + op: "Mul" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Reshape_1" + input: "bert/encoder/layer_9/intermediate/dense/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/mul_3" + op: "Mul" + input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/mul_2" + input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Select_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/mul_3" + input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Sum_1" + input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/AddN_16" + op: "AddN" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_3_grad/Mul" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Reshape_1" + input: "gradients/bert/encoder/layer_9/intermediate/dense/Pow_grad/Reshape" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/intermediate/dense/mul_3_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/AddN_16" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/AddN_16" + input: "bert/encoder/layer_9/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_1" + input: "gradients/AddN_16" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_17" + op: "AddN" + input: "gradients/AddN_15" + input: "gradients/bert/encoder/layer_9/intermediate/dense/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_17" + input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_17" + input: "bert/encoder/layer_9/attention/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_17" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_17" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_9/attention/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_18" + op: "AddN" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_18" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_18" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_9/attention/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_9/attention/output/add" + input: "bert/encoder/layer_9/attention/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_9/attention/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_19" + op: "AddN" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_19" + input: "bert/encoder/layer_9/attention/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_19" + input: "bert/encoder/layer_9/attention/output/dropout/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/Shape" + input: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_1_grad/Mul" + input: "bert/encoder/layer_9/attention/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/Sum" + input: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_9/attention/output/dense/BiasAdd" + input: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_1_grad/Mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/Reshape" + input: "bert/encoder/layer_9/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_9/attention/self/Reshape_3" + input: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Reshape_3_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Reshape_3_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/output/dense/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_9/attention/self/Reshape_3_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/transpose_3_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_9/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/transpose_3_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_9/attention/self/Reshape_3_grad/Reshape" + input: "gradients/bert/encoder/layer_9/attention/self/transpose_3_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/layer_9/attention/self/transpose_3_grad/transpose" + input: "bert/encoder/layer_9/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/layer_9/attention/self/dropout/mul_1" + input: "gradients/bert/encoder/layer_9/attention/self/transpose_3_grad/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/strided_slice/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/strided_slice/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/strided_slice/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/strided_slice" + op: "StridedSlice" + input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/Shape" + input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/strided_slice/stack" + input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/strided_slice/stack_1" + input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/strided_slice/stack_2" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 1 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/strided_slice_1/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/strided_slice_1/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/strided_slice_1/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/strided_slice_1" + op: "StridedSlice" + input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/Shape_1" + input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/strided_slice_1/stack" + input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/strided_slice_1/stack_1" + input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/strided_slice_1/stack_2" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 1 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/strided_slice" + input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/strided_slice_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/MatMul" + input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/Sum" + input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/MatMul_1" + input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/Sum_1" + input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/Reshape" + input: "bert/encoder/layer_9/attention/self/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/Reshape" + input: "bert/encoder/layer_9/attention/self/dropout/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/transpose_2_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_9/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/transpose_2_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/Reshape_1" + input: "gradients/bert/encoder/layer_9/attention/self/transpose_2_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/Shape" + input: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_1_grad/Mul" + input: "bert/encoder/layer_9/attention/self/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/Sum" + input: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_9/attention/self/Softmax" + input: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_1_grad/Mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/layer_9/attention/self/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Softmax_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/Reshape" + input: "bert/encoder/layer_9/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Softmax_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Softmax_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_9/attention/self/Softmax_grad/mul" + input: "gradients/bert/encoder/layer_9/attention/self/Softmax_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Softmax_grad/sub" + op: "Sub" + input: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/Reshape" + input: "gradients/bert/encoder/layer_9/attention/self/Softmax_grad/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Softmax_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_9/attention/self/Softmax_grad/sub" + input: "bert/encoder/layer_9/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/value/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_9/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\001\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_9/attention/self/add_grad/Shape" + input: "gradients/bert/encoder/layer_9/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_9/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_9/attention/self/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/self/add_grad/Sum" + input: "gradients/bert/encoder/layer_9/attention/self/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_9/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_9/attention/self/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/self/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_9/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/value/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_9/attention/self/Reshape_2_grad/Reshape" + input: "bert/encoder/layer_9/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/value/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_9/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Shape" + input: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_9/attention/self/add_grad/Reshape" + input: "bert/encoder/layer_9/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Mul" + input: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Sum" + input: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_9/attention/self/MatMul" + input: "gradients/bert/encoder/layer_9/attention/self/add_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/MatMul_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_9/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/MatMul_grad/MatMul_1" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_9/attention/self/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_9/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_9/attention/self/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_9/attention/self/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_9/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_9/attention/self/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/layer_9/attention/self/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/self/transpose_grad/transpose" + input: "gradients/bert/encoder/layer_9/attention/self/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/layer_9/attention/self/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/query/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_9/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/key/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_9/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/query/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_9/attention/self/Reshape_grad/Reshape" + input: "bert/encoder/layer_9/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/query/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_9/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/key/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_9/attention/self/Reshape_1_grad/Reshape" + input: "bert/encoder/layer_9/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/key/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_9/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_20" + op: "AddN" + input: "gradients/AddN_19" + input: "gradients/bert/encoder/layer_9/attention/self/value/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_9/attention/self/query/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_9/attention/self/key/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 4 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_20" + input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_20" + input: "bert/encoder/layer_8/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_20" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_20" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_8/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_21" + op: "AddN" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_21" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_21" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_8/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_8/output/add" + input: "bert/encoder/layer_8/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_8/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_22" + op: "AddN" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/dropout/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_22" + input: "bert/encoder/layer_8/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/dropout/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_22" + input: "bert/encoder/layer_8/output/dropout/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/Shape" + input: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_8/output/dropout/mul_1_grad/Mul" + input: "bert/encoder/layer_8/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/Sum" + input: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_8/output/dense/BiasAdd" + input: "gradients/bert/encoder/layer_8/output/dropout/mul_1_grad/Mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/Reshape" + input: "bert/encoder/layer_8/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_8/intermediate/dense/mul_3" + input: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_3_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_8/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_8/intermediate/dense/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_3_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_8/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_8/intermediate/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_3_grad/Mul_1" + input: "bert/encoder/layer_8/intermediate/dense/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_8/intermediate/dense/mul_2/x" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_3_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/add_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/add_1_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/add_1_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_8/intermediate/dense/add_1_grad/Shape" + input: "gradients/bert/encoder/layer_8/intermediate/dense/add_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/add_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_2_grad/Reshape_1" + input: "gradients/bert/encoder/layer_8/intermediate/dense/add_1_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/add_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/intermediate/dense/add_1_grad/Sum" + input: "gradients/bert/encoder/layer_8/intermediate/dense/add_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/add_1_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_2_grad/Reshape_1" + input: "gradients/bert/encoder/layer_8/intermediate/dense/add_1_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/add_1_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/intermediate/dense/add_1_grad/Sum_1" + input: "gradients/bert/encoder/layer_8/intermediate/dense/add_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/Tanh_grad/TanhGrad" + op: "TanhGrad" + input: "bert/encoder/layer_8/intermediate/dense/Tanh" + input: "gradients/bert/encoder/layer_8/intermediate/dense/add_1_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Shape" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_8/intermediate/dense/Tanh_grad/TanhGrad" + input: "bert/encoder/layer_8/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Sum" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_8/intermediate/dense/mul_1/x" + input: "gradients/bert/encoder/layer_8/intermediate/dense/Tanh_grad/TanhGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Sum_1" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Shape" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Reshape_1" + input: "bert/encoder/layer_8/intermediate/dense/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Mul" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Sum" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_8/intermediate/dense/mul/x" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Shape" + input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Reshape_1" + input: "bert/encoder/layer_8/intermediate/dense/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/sub/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/sub" + op: "Sub" + input: "bert/encoder/layer_8/intermediate/dense/Pow/y" + input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/sub/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Pow" + op: "Pow" + input: "bert/encoder/layer_8/intermediate/dense/BiasAdd" + input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/mul" + input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/mul_1" + input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Sum" + input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Greater/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Greater" + op: "Greater" + input: "bert/encoder/layer_8/intermediate/dense/BiasAdd" + input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Greater/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/ones_like/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/ones_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/ones_like" + op: "Fill" + input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/ones_like/Shape" + input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/ones_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Select" + op: "Select" + input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Greater" + input: "bert/encoder/layer_8/intermediate/dense/BiasAdd" + input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/ones_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Log" + op: "Log" + input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/zeros_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/zeros_like" + op: "Fill" + input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor" + input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/zeros_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Select_1" + op: "Select" + input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Greater" + input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Log" + input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/zeros_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/mul_2" + op: "Mul" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Reshape_1" + input: "bert/encoder/layer_8/intermediate/dense/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/mul_3" + op: "Mul" + input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/mul_2" + input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Select_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/mul_3" + input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Sum_1" + input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/AddN_23" + op: "AddN" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_3_grad/Mul" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Reshape_1" + input: "gradients/bert/encoder/layer_8/intermediate/dense/Pow_grad/Reshape" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/intermediate/dense/mul_3_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/AddN_23" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/AddN_23" + input: "bert/encoder/layer_8/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_1" + input: "gradients/AddN_23" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_24" + op: "AddN" + input: "gradients/AddN_22" + input: "gradients/bert/encoder/layer_8/intermediate/dense/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_24" + input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_24" + input: "bert/encoder/layer_8/attention/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_24" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_24" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_8/attention/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_25" + op: "AddN" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_25" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_25" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_8/attention/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_8/attention/output/add" + input: "bert/encoder/layer_8/attention/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_8/attention/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_26" + op: "AddN" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_26" + input: "bert/encoder/layer_8/attention/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_26" + input: "bert/encoder/layer_8/attention/output/dropout/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/Shape" + input: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_1_grad/Mul" + input: "bert/encoder/layer_8/attention/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/Sum" + input: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_8/attention/output/dense/BiasAdd" + input: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_1_grad/Mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/Reshape" + input: "bert/encoder/layer_8/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_8/attention/self/Reshape_3" + input: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Reshape_3_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Reshape_3_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/output/dense/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_8/attention/self/Reshape_3_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/transpose_3_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_8/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/transpose_3_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_8/attention/self/Reshape_3_grad/Reshape" + input: "gradients/bert/encoder/layer_8/attention/self/transpose_3_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/layer_8/attention/self/transpose_3_grad/transpose" + input: "bert/encoder/layer_8/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/layer_8/attention/self/dropout/mul_1" + input: "gradients/bert/encoder/layer_8/attention/self/transpose_3_grad/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/strided_slice/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/strided_slice/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/strided_slice/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/strided_slice" + op: "StridedSlice" + input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/Shape" + input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/strided_slice/stack" + input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/strided_slice/stack_1" + input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/strided_slice/stack_2" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 1 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/strided_slice_1/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/strided_slice_1/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/strided_slice_1/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/strided_slice_1" + op: "StridedSlice" + input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/Shape_1" + input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/strided_slice_1/stack" + input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/strided_slice_1/stack_1" + input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/strided_slice_1/stack_2" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 1 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/strided_slice" + input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/strided_slice_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/MatMul" + input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/Sum" + input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/MatMul_1" + input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/Sum_1" + input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/Reshape" + input: "bert/encoder/layer_8/attention/self/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/Reshape" + input: "bert/encoder/layer_8/attention/self/dropout/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/transpose_2_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_8/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/transpose_2_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/Reshape_1" + input: "gradients/bert/encoder/layer_8/attention/self/transpose_2_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/Shape" + input: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_1_grad/Mul" + input: "bert/encoder/layer_8/attention/self/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/Sum" + input: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_8/attention/self/Softmax" + input: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_1_grad/Mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/layer_8/attention/self/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Softmax_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/Reshape" + input: "bert/encoder/layer_8/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Softmax_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Softmax_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_8/attention/self/Softmax_grad/mul" + input: "gradients/bert/encoder/layer_8/attention/self/Softmax_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Softmax_grad/sub" + op: "Sub" + input: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/Reshape" + input: "gradients/bert/encoder/layer_8/attention/self/Softmax_grad/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Softmax_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_8/attention/self/Softmax_grad/sub" + input: "bert/encoder/layer_8/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/value/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_8/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\001\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_8/attention/self/add_grad/Shape" + input: "gradients/bert/encoder/layer_8/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_8/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_8/attention/self/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/self/add_grad/Sum" + input: "gradients/bert/encoder/layer_8/attention/self/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_8/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_8/attention/self/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/self/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_8/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/value/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_8/attention/self/Reshape_2_grad/Reshape" + input: "bert/encoder/layer_8/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/value/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_8/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Shape" + input: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_8/attention/self/add_grad/Reshape" + input: "bert/encoder/layer_8/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Mul" + input: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Sum" + input: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_8/attention/self/MatMul" + input: "gradients/bert/encoder/layer_8/attention/self/add_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/MatMul_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_8/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/MatMul_grad/MatMul_1" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_8/attention/self/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_8/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_8/attention/self/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_8/attention/self/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_8/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_8/attention/self/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/layer_8/attention/self/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/self/transpose_grad/transpose" + input: "gradients/bert/encoder/layer_8/attention/self/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/layer_8/attention/self/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/query/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_8/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/key/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_8/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/query/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_8/attention/self/Reshape_grad/Reshape" + input: "bert/encoder/layer_8/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/query/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_8/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/key/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_8/attention/self/Reshape_1_grad/Reshape" + input: "bert/encoder/layer_8/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/key/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_8/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_27" + op: "AddN" + input: "gradients/AddN_26" + input: "gradients/bert/encoder/layer_8/attention/self/value/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_8/attention/self/query/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_8/attention/self/key/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 4 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_27" + input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_27" + input: "bert/encoder/layer_7/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_27" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_27" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_7/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_28" + op: "AddN" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_28" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_28" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_7/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_7/output/add" + input: "bert/encoder/layer_7/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_7/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_29" + op: "AddN" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/dropout/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_29" + input: "bert/encoder/layer_7/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/dropout/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_29" + input: "bert/encoder/layer_7/output/dropout/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/Shape" + input: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_7/output/dropout/mul_1_grad/Mul" + input: "bert/encoder/layer_7/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/Sum" + input: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_7/output/dense/BiasAdd" + input: "gradients/bert/encoder/layer_7/output/dropout/mul_1_grad/Mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/Reshape" + input: "bert/encoder/layer_7/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_7/intermediate/dense/mul_3" + input: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_3_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_7/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_7/intermediate/dense/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_3_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_7/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_7/intermediate/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_3_grad/Mul_1" + input: "bert/encoder/layer_7/intermediate/dense/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_7/intermediate/dense/mul_2/x" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_3_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/add_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/add_1_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/add_1_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_7/intermediate/dense/add_1_grad/Shape" + input: "gradients/bert/encoder/layer_7/intermediate/dense/add_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/add_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_2_grad/Reshape_1" + input: "gradients/bert/encoder/layer_7/intermediate/dense/add_1_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/add_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/intermediate/dense/add_1_grad/Sum" + input: "gradients/bert/encoder/layer_7/intermediate/dense/add_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/add_1_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_2_grad/Reshape_1" + input: "gradients/bert/encoder/layer_7/intermediate/dense/add_1_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/add_1_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/intermediate/dense/add_1_grad/Sum_1" + input: "gradients/bert/encoder/layer_7/intermediate/dense/add_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/Tanh_grad/TanhGrad" + op: "TanhGrad" + input: "bert/encoder/layer_7/intermediate/dense/Tanh" + input: "gradients/bert/encoder/layer_7/intermediate/dense/add_1_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Shape" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_7/intermediate/dense/Tanh_grad/TanhGrad" + input: "bert/encoder/layer_7/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Sum" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_7/intermediate/dense/mul_1/x" + input: "gradients/bert/encoder/layer_7/intermediate/dense/Tanh_grad/TanhGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Sum_1" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Shape" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Reshape_1" + input: "bert/encoder/layer_7/intermediate/dense/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Mul" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Sum" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_7/intermediate/dense/mul/x" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Shape" + input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Reshape_1" + input: "bert/encoder/layer_7/intermediate/dense/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/sub/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/sub" + op: "Sub" + input: "bert/encoder/layer_7/intermediate/dense/Pow/y" + input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/sub/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Pow" + op: "Pow" + input: "bert/encoder/layer_7/intermediate/dense/BiasAdd" + input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/mul" + input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/mul_1" + input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Sum" + input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Greater/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Greater" + op: "Greater" + input: "bert/encoder/layer_7/intermediate/dense/BiasAdd" + input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Greater/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/ones_like/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/ones_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/ones_like" + op: "Fill" + input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/ones_like/Shape" + input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/ones_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Select" + op: "Select" + input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Greater" + input: "bert/encoder/layer_7/intermediate/dense/BiasAdd" + input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/ones_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Log" + op: "Log" + input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/zeros_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/zeros_like" + op: "Fill" + input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor" + input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/zeros_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Select_1" + op: "Select" + input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Greater" + input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Log" + input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/zeros_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/mul_2" + op: "Mul" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Reshape_1" + input: "bert/encoder/layer_7/intermediate/dense/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/mul_3" + op: "Mul" + input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/mul_2" + input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Select_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/mul_3" + input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Sum_1" + input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/AddN_30" + op: "AddN" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_3_grad/Mul" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Reshape_1" + input: "gradients/bert/encoder/layer_7/intermediate/dense/Pow_grad/Reshape" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/intermediate/dense/mul_3_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/AddN_30" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/AddN_30" + input: "bert/encoder/layer_7/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_1" + input: "gradients/AddN_30" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_31" + op: "AddN" + input: "gradients/AddN_29" + input: "gradients/bert/encoder/layer_7/intermediate/dense/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_31" + input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_31" + input: "bert/encoder/layer_7/attention/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_31" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_31" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_7/attention/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_32" + op: "AddN" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_32" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_32" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_7/attention/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_7/attention/output/add" + input: "bert/encoder/layer_7/attention/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_7/attention/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_33" + op: "AddN" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_33" + input: "bert/encoder/layer_7/attention/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_33" + input: "bert/encoder/layer_7/attention/output/dropout/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/Shape" + input: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_1_grad/Mul" + input: "bert/encoder/layer_7/attention/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/Sum" + input: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_7/attention/output/dense/BiasAdd" + input: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_1_grad/Mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/Reshape" + input: "bert/encoder/layer_7/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_7/attention/self/Reshape_3" + input: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Reshape_3_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Reshape_3_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/output/dense/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_7/attention/self/Reshape_3_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/transpose_3_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_7/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/transpose_3_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_7/attention/self/Reshape_3_grad/Reshape" + input: "gradients/bert/encoder/layer_7/attention/self/transpose_3_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/layer_7/attention/self/transpose_3_grad/transpose" + input: "bert/encoder/layer_7/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/layer_7/attention/self/dropout/mul_1" + input: "gradients/bert/encoder/layer_7/attention/self/transpose_3_grad/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/strided_slice/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/strided_slice/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/strided_slice/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/strided_slice" + op: "StridedSlice" + input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/Shape" + input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/strided_slice/stack" + input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/strided_slice/stack_1" + input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/strided_slice/stack_2" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 1 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/strided_slice_1/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/strided_slice_1/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/strided_slice_1/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/strided_slice_1" + op: "StridedSlice" + input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/Shape_1" + input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/strided_slice_1/stack" + input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/strided_slice_1/stack_1" + input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/strided_slice_1/stack_2" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 1 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/strided_slice" + input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/strided_slice_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/MatMul" + input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/Sum" + input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/MatMul_1" + input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/Sum_1" + input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/Reshape" + input: "bert/encoder/layer_7/attention/self/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/Reshape" + input: "bert/encoder/layer_7/attention/self/dropout/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/transpose_2_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_7/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/transpose_2_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/Reshape_1" + input: "gradients/bert/encoder/layer_7/attention/self/transpose_2_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/Shape" + input: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_1_grad/Mul" + input: "bert/encoder/layer_7/attention/self/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/Sum" + input: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_7/attention/self/Softmax" + input: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_1_grad/Mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/layer_7/attention/self/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Softmax_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/Reshape" + input: "bert/encoder/layer_7/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Softmax_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Softmax_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_7/attention/self/Softmax_grad/mul" + input: "gradients/bert/encoder/layer_7/attention/self/Softmax_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Softmax_grad/sub" + op: "Sub" + input: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/Reshape" + input: "gradients/bert/encoder/layer_7/attention/self/Softmax_grad/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Softmax_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_7/attention/self/Softmax_grad/sub" + input: "bert/encoder/layer_7/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/value/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_7/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\001\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_7/attention/self/add_grad/Shape" + input: "gradients/bert/encoder/layer_7/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_7/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_7/attention/self/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/self/add_grad/Sum" + input: "gradients/bert/encoder/layer_7/attention/self/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_7/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_7/attention/self/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/self/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_7/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/value/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_7/attention/self/Reshape_2_grad/Reshape" + input: "bert/encoder/layer_7/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/value/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_7/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Shape" + input: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_7/attention/self/add_grad/Reshape" + input: "bert/encoder/layer_7/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Mul" + input: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Sum" + input: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_7/attention/self/MatMul" + input: "gradients/bert/encoder/layer_7/attention/self/add_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/MatMul_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_7/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/MatMul_grad/MatMul_1" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_7/attention/self/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_7/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_7/attention/self/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_7/attention/self/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_7/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_7/attention/self/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/layer_7/attention/self/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/self/transpose_grad/transpose" + input: "gradients/bert/encoder/layer_7/attention/self/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/layer_7/attention/self/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/query/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_7/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/key/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_7/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/query/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_7/attention/self/Reshape_grad/Reshape" + input: "bert/encoder/layer_7/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/query/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_7/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/key/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_7/attention/self/Reshape_1_grad/Reshape" + input: "bert/encoder/layer_7/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/key/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_7/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_34" + op: "AddN" + input: "gradients/AddN_33" + input: "gradients/bert/encoder/layer_7/attention/self/value/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_7/attention/self/query/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_7/attention/self/key/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 4 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_34" + input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_34" + input: "bert/encoder/layer_6/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_34" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_34" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_6/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_35" + op: "AddN" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_35" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_35" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_6/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_6/output/add" + input: "bert/encoder/layer_6/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_6/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_36" + op: "AddN" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/dropout/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_36" + input: "bert/encoder/layer_6/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/dropout/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_36" + input: "bert/encoder/layer_6/output/dropout/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/Shape" + input: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_6/output/dropout/mul_1_grad/Mul" + input: "bert/encoder/layer_6/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/Sum" + input: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_6/output/dense/BiasAdd" + input: "gradients/bert/encoder/layer_6/output/dropout/mul_1_grad/Mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/Reshape" + input: "bert/encoder/layer_6/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_6/intermediate/dense/mul_3" + input: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_3_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_6/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_6/intermediate/dense/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_3_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_6/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_6/intermediate/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_3_grad/Mul_1" + input: "bert/encoder/layer_6/intermediate/dense/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_6/intermediate/dense/mul_2/x" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_3_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/add_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/add_1_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/add_1_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_6/intermediate/dense/add_1_grad/Shape" + input: "gradients/bert/encoder/layer_6/intermediate/dense/add_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/add_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_2_grad/Reshape_1" + input: "gradients/bert/encoder/layer_6/intermediate/dense/add_1_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/add_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/intermediate/dense/add_1_grad/Sum" + input: "gradients/bert/encoder/layer_6/intermediate/dense/add_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/add_1_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_2_grad/Reshape_1" + input: "gradients/bert/encoder/layer_6/intermediate/dense/add_1_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/add_1_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/intermediate/dense/add_1_grad/Sum_1" + input: "gradients/bert/encoder/layer_6/intermediate/dense/add_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/Tanh_grad/TanhGrad" + op: "TanhGrad" + input: "bert/encoder/layer_6/intermediate/dense/Tanh" + input: "gradients/bert/encoder/layer_6/intermediate/dense/add_1_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Shape" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_6/intermediate/dense/Tanh_grad/TanhGrad" + input: "bert/encoder/layer_6/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Sum" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_6/intermediate/dense/mul_1/x" + input: "gradients/bert/encoder/layer_6/intermediate/dense/Tanh_grad/TanhGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Sum_1" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Shape" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Reshape_1" + input: "bert/encoder/layer_6/intermediate/dense/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Mul" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Sum" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_6/intermediate/dense/mul/x" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Shape" + input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Reshape_1" + input: "bert/encoder/layer_6/intermediate/dense/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/sub/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/sub" + op: "Sub" + input: "bert/encoder/layer_6/intermediate/dense/Pow/y" + input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/sub/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Pow" + op: "Pow" + input: "bert/encoder/layer_6/intermediate/dense/BiasAdd" + input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/mul" + input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/mul_1" + input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Sum" + input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Greater/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Greater" + op: "Greater" + input: "bert/encoder/layer_6/intermediate/dense/BiasAdd" + input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Greater/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/ones_like/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/ones_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/ones_like" + op: "Fill" + input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/ones_like/Shape" + input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/ones_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Select" + op: "Select" + input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Greater" + input: "bert/encoder/layer_6/intermediate/dense/BiasAdd" + input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/ones_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Log" + op: "Log" + input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/zeros_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/zeros_like" + op: "Fill" + input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor" + input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/zeros_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Select_1" + op: "Select" + input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Greater" + input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Log" + input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/zeros_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/mul_2" + op: "Mul" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Reshape_1" + input: "bert/encoder/layer_6/intermediate/dense/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/mul_3" + op: "Mul" + input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/mul_2" + input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Select_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/mul_3" + input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Sum_1" + input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/AddN_37" + op: "AddN" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_3_grad/Mul" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Reshape_1" + input: "gradients/bert/encoder/layer_6/intermediate/dense/Pow_grad/Reshape" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/intermediate/dense/mul_3_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/AddN_37" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/AddN_37" + input: "bert/encoder/layer_6/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_1" + input: "gradients/AddN_37" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_38" + op: "AddN" + input: "gradients/AddN_36" + input: "gradients/bert/encoder/layer_6/intermediate/dense/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_38" + input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_38" + input: "bert/encoder/layer_6/attention/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_38" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_38" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_6/attention/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_39" + op: "AddN" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_39" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_39" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_6/attention/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_6/attention/output/add" + input: "bert/encoder/layer_6/attention/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_6/attention/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_40" + op: "AddN" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_40" + input: "bert/encoder/layer_6/attention/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_40" + input: "bert/encoder/layer_6/attention/output/dropout/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/Shape" + input: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_1_grad/Mul" + input: "bert/encoder/layer_6/attention/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/Sum" + input: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_6/attention/output/dense/BiasAdd" + input: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_1_grad/Mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/Reshape" + input: "bert/encoder/layer_6/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_6/attention/self/Reshape_3" + input: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Reshape_3_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Reshape_3_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/output/dense/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_6/attention/self/Reshape_3_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/transpose_3_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_6/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/transpose_3_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_6/attention/self/Reshape_3_grad/Reshape" + input: "gradients/bert/encoder/layer_6/attention/self/transpose_3_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/layer_6/attention/self/transpose_3_grad/transpose" + input: "bert/encoder/layer_6/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/layer_6/attention/self/dropout/mul_1" + input: "gradients/bert/encoder/layer_6/attention/self/transpose_3_grad/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/strided_slice/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/strided_slice/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/strided_slice/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/strided_slice" + op: "StridedSlice" + input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/Shape" + input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/strided_slice/stack" + input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/strided_slice/stack_1" + input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/strided_slice/stack_2" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 1 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/strided_slice_1/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/strided_slice_1/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/strided_slice_1/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/strided_slice_1" + op: "StridedSlice" + input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/Shape_1" + input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/strided_slice_1/stack" + input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/strided_slice_1/stack_1" + input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/strided_slice_1/stack_2" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 1 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/strided_slice" + input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/strided_slice_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/MatMul" + input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/Sum" + input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/MatMul_1" + input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/Sum_1" + input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/Reshape" + input: "bert/encoder/layer_6/attention/self/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/Reshape" + input: "bert/encoder/layer_6/attention/self/dropout/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/transpose_2_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_6/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/transpose_2_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/Reshape_1" + input: "gradients/bert/encoder/layer_6/attention/self/transpose_2_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/Shape" + input: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_1_grad/Mul" + input: "bert/encoder/layer_6/attention/self/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/Sum" + input: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_6/attention/self/Softmax" + input: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_1_grad/Mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/layer_6/attention/self/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Softmax_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/Reshape" + input: "bert/encoder/layer_6/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Softmax_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Softmax_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_6/attention/self/Softmax_grad/mul" + input: "gradients/bert/encoder/layer_6/attention/self/Softmax_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Softmax_grad/sub" + op: "Sub" + input: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/Reshape" + input: "gradients/bert/encoder/layer_6/attention/self/Softmax_grad/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Softmax_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_6/attention/self/Softmax_grad/sub" + input: "bert/encoder/layer_6/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/value/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_6/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\001\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_6/attention/self/add_grad/Shape" + input: "gradients/bert/encoder/layer_6/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_6/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_6/attention/self/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/self/add_grad/Sum" + input: "gradients/bert/encoder/layer_6/attention/self/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_6/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_6/attention/self/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/self/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_6/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/value/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_6/attention/self/Reshape_2_grad/Reshape" + input: "bert/encoder/layer_6/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/value/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_6/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Shape" + input: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_6/attention/self/add_grad/Reshape" + input: "bert/encoder/layer_6/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Mul" + input: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Sum" + input: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_6/attention/self/MatMul" + input: "gradients/bert/encoder/layer_6/attention/self/add_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/MatMul_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_6/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/MatMul_grad/MatMul_1" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_6/attention/self/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_6/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_6/attention/self/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_6/attention/self/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_6/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_6/attention/self/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/layer_6/attention/self/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/self/transpose_grad/transpose" + input: "gradients/bert/encoder/layer_6/attention/self/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/layer_6/attention/self/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/query/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_6/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/key/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_6/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/query/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_6/attention/self/Reshape_grad/Reshape" + input: "bert/encoder/layer_6/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/query/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_6/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/key/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_6/attention/self/Reshape_1_grad/Reshape" + input: "bert/encoder/layer_6/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/key/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_6/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_41" + op: "AddN" + input: "gradients/AddN_40" + input: "gradients/bert/encoder/layer_6/attention/self/value/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_6/attention/self/query/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_6/attention/self/key/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 4 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_41" + input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_41" + input: "bert/encoder/layer_5/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_41" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_41" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_5/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_42" + op: "AddN" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_42" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_42" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_5/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_5/output/add" + input: "bert/encoder/layer_5/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_5/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_43" + op: "AddN" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/dropout/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_43" + input: "bert/encoder/layer_5/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/dropout/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_43" + input: "bert/encoder/layer_5/output/dropout/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/Shape" + input: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_5/output/dropout/mul_1_grad/Mul" + input: "bert/encoder/layer_5/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/Sum" + input: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_5/output/dense/BiasAdd" + input: "gradients/bert/encoder/layer_5/output/dropout/mul_1_grad/Mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/Reshape" + input: "bert/encoder/layer_5/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_5/intermediate/dense/mul_3" + input: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_3_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_5/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_5/intermediate/dense/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_3_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_5/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_5/intermediate/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_3_grad/Mul_1" + input: "bert/encoder/layer_5/intermediate/dense/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_5/intermediate/dense/mul_2/x" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_3_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/add_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/add_1_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/add_1_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_5/intermediate/dense/add_1_grad/Shape" + input: "gradients/bert/encoder/layer_5/intermediate/dense/add_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/add_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_2_grad/Reshape_1" + input: "gradients/bert/encoder/layer_5/intermediate/dense/add_1_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/add_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/intermediate/dense/add_1_grad/Sum" + input: "gradients/bert/encoder/layer_5/intermediate/dense/add_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/add_1_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_2_grad/Reshape_1" + input: "gradients/bert/encoder/layer_5/intermediate/dense/add_1_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/add_1_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/intermediate/dense/add_1_grad/Sum_1" + input: "gradients/bert/encoder/layer_5/intermediate/dense/add_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/Tanh_grad/TanhGrad" + op: "TanhGrad" + input: "bert/encoder/layer_5/intermediate/dense/Tanh" + input: "gradients/bert/encoder/layer_5/intermediate/dense/add_1_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Shape" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_5/intermediate/dense/Tanh_grad/TanhGrad" + input: "bert/encoder/layer_5/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Sum" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_5/intermediate/dense/mul_1/x" + input: "gradients/bert/encoder/layer_5/intermediate/dense/Tanh_grad/TanhGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Sum_1" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Shape" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Reshape_1" + input: "bert/encoder/layer_5/intermediate/dense/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Mul" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Sum" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_5/intermediate/dense/mul/x" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Shape" + input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Reshape_1" + input: "bert/encoder/layer_5/intermediate/dense/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/sub/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/sub" + op: "Sub" + input: "bert/encoder/layer_5/intermediate/dense/Pow/y" + input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/sub/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Pow" + op: "Pow" + input: "bert/encoder/layer_5/intermediate/dense/BiasAdd" + input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/mul" + input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/mul_1" + input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Sum" + input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Greater/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Greater" + op: "Greater" + input: "bert/encoder/layer_5/intermediate/dense/BiasAdd" + input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Greater/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/ones_like/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/ones_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/ones_like" + op: "Fill" + input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/ones_like/Shape" + input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/ones_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Select" + op: "Select" + input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Greater" + input: "bert/encoder/layer_5/intermediate/dense/BiasAdd" + input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/ones_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Log" + op: "Log" + input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/zeros_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/zeros_like" + op: "Fill" + input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor" + input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/zeros_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Select_1" + op: "Select" + input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Greater" + input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Log" + input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/zeros_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/mul_2" + op: "Mul" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Reshape_1" + input: "bert/encoder/layer_5/intermediate/dense/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/mul_3" + op: "Mul" + input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/mul_2" + input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Select_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/mul_3" + input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Sum_1" + input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/AddN_44" + op: "AddN" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_3_grad/Mul" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Reshape_1" + input: "gradients/bert/encoder/layer_5/intermediate/dense/Pow_grad/Reshape" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/intermediate/dense/mul_3_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/AddN_44" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/AddN_44" + input: "bert/encoder/layer_5/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_1" + input: "gradients/AddN_44" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_45" + op: "AddN" + input: "gradients/AddN_43" + input: "gradients/bert/encoder/layer_5/intermediate/dense/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_45" + input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_45" + input: "bert/encoder/layer_5/attention/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_45" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_45" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_5/attention/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_46" + op: "AddN" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_46" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_46" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_5/attention/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_5/attention/output/add" + input: "bert/encoder/layer_5/attention/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_5/attention/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_47" + op: "AddN" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_47" + input: "bert/encoder/layer_5/attention/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_47" + input: "bert/encoder/layer_5/attention/output/dropout/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/Shape" + input: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_1_grad/Mul" + input: "bert/encoder/layer_5/attention/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/Sum" + input: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_5/attention/output/dense/BiasAdd" + input: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_1_grad/Mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/Reshape" + input: "bert/encoder/layer_5/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_5/attention/self/Reshape_3" + input: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Reshape_3_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Reshape_3_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/output/dense/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_5/attention/self/Reshape_3_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/transpose_3_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_5/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/transpose_3_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_5/attention/self/Reshape_3_grad/Reshape" + input: "gradients/bert/encoder/layer_5/attention/self/transpose_3_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/layer_5/attention/self/transpose_3_grad/transpose" + input: "bert/encoder/layer_5/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/layer_5/attention/self/dropout/mul_1" + input: "gradients/bert/encoder/layer_5/attention/self/transpose_3_grad/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/strided_slice/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/strided_slice/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/strided_slice/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/strided_slice" + op: "StridedSlice" + input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/Shape" + input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/strided_slice/stack" + input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/strided_slice/stack_1" + input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/strided_slice/stack_2" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 1 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/strided_slice_1/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/strided_slice_1/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/strided_slice_1/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/strided_slice_1" + op: "StridedSlice" + input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/Shape_1" + input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/strided_slice_1/stack" + input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/strided_slice_1/stack_1" + input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/strided_slice_1/stack_2" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 1 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/strided_slice" + input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/strided_slice_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/MatMul" + input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/Sum" + input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/MatMul_1" + input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/Sum_1" + input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/Reshape" + input: "bert/encoder/layer_5/attention/self/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/Reshape" + input: "bert/encoder/layer_5/attention/self/dropout/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/transpose_2_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_5/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/transpose_2_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/Reshape_1" + input: "gradients/bert/encoder/layer_5/attention/self/transpose_2_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/Shape" + input: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_1_grad/Mul" + input: "bert/encoder/layer_5/attention/self/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/Sum" + input: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_5/attention/self/Softmax" + input: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_1_grad/Mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/layer_5/attention/self/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Softmax_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/Reshape" + input: "bert/encoder/layer_5/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Softmax_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Softmax_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_5/attention/self/Softmax_grad/mul" + input: "gradients/bert/encoder/layer_5/attention/self/Softmax_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Softmax_grad/sub" + op: "Sub" + input: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/Reshape" + input: "gradients/bert/encoder/layer_5/attention/self/Softmax_grad/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Softmax_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_5/attention/self/Softmax_grad/sub" + input: "bert/encoder/layer_5/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/value/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_5/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\001\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_5/attention/self/add_grad/Shape" + input: "gradients/bert/encoder/layer_5/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_5/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_5/attention/self/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/self/add_grad/Sum" + input: "gradients/bert/encoder/layer_5/attention/self/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_5/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_5/attention/self/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/self/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_5/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/value/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_5/attention/self/Reshape_2_grad/Reshape" + input: "bert/encoder/layer_5/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/value/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_5/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Shape" + input: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_5/attention/self/add_grad/Reshape" + input: "bert/encoder/layer_5/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Mul" + input: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Sum" + input: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_5/attention/self/MatMul" + input: "gradients/bert/encoder/layer_5/attention/self/add_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/MatMul_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_5/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/MatMul_grad/MatMul_1" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_5/attention/self/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_5/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_5/attention/self/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_5/attention/self/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_5/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_5/attention/self/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/layer_5/attention/self/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/self/transpose_grad/transpose" + input: "gradients/bert/encoder/layer_5/attention/self/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/layer_5/attention/self/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/query/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_5/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/key/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_5/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/query/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_5/attention/self/Reshape_grad/Reshape" + input: "bert/encoder/layer_5/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/query/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_5/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/key/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_5/attention/self/Reshape_1_grad/Reshape" + input: "bert/encoder/layer_5/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/key/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_5/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_48" + op: "AddN" + input: "gradients/AddN_47" + input: "gradients/bert/encoder/layer_5/attention/self/value/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_5/attention/self/query/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_5/attention/self/key/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 4 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_48" + input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_48" + input: "bert/encoder/layer_4/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_48" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_48" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_4/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_49" + op: "AddN" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_49" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_49" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_4/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_4/output/add" + input: "bert/encoder/layer_4/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_4/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_50" + op: "AddN" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/dropout/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_50" + input: "bert/encoder/layer_4/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/dropout/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_50" + input: "bert/encoder/layer_4/output/dropout/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/Shape" + input: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_4/output/dropout/mul_1_grad/Mul" + input: "bert/encoder/layer_4/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/Sum" + input: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_4/output/dense/BiasAdd" + input: "gradients/bert/encoder/layer_4/output/dropout/mul_1_grad/Mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/Reshape" + input: "bert/encoder/layer_4/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_4/intermediate/dense/mul_3" + input: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_3_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_4/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_4/intermediate/dense/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_3_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_4/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_4/intermediate/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_3_grad/Mul_1" + input: "bert/encoder/layer_4/intermediate/dense/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_4/intermediate/dense/mul_2/x" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_3_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/add_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/add_1_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/add_1_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_4/intermediate/dense/add_1_grad/Shape" + input: "gradients/bert/encoder/layer_4/intermediate/dense/add_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/add_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_2_grad/Reshape_1" + input: "gradients/bert/encoder/layer_4/intermediate/dense/add_1_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/add_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/intermediate/dense/add_1_grad/Sum" + input: "gradients/bert/encoder/layer_4/intermediate/dense/add_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/add_1_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_2_grad/Reshape_1" + input: "gradients/bert/encoder/layer_4/intermediate/dense/add_1_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/add_1_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/intermediate/dense/add_1_grad/Sum_1" + input: "gradients/bert/encoder/layer_4/intermediate/dense/add_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/Tanh_grad/TanhGrad" + op: "TanhGrad" + input: "bert/encoder/layer_4/intermediate/dense/Tanh" + input: "gradients/bert/encoder/layer_4/intermediate/dense/add_1_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Shape" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_4/intermediate/dense/Tanh_grad/TanhGrad" + input: "bert/encoder/layer_4/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Sum" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_4/intermediate/dense/mul_1/x" + input: "gradients/bert/encoder/layer_4/intermediate/dense/Tanh_grad/TanhGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Sum_1" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Shape" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Reshape_1" + input: "bert/encoder/layer_4/intermediate/dense/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Mul" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Sum" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_4/intermediate/dense/mul/x" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Shape" + input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Reshape_1" + input: "bert/encoder/layer_4/intermediate/dense/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/sub/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/sub" + op: "Sub" + input: "bert/encoder/layer_4/intermediate/dense/Pow/y" + input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/sub/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Pow" + op: "Pow" + input: "bert/encoder/layer_4/intermediate/dense/BiasAdd" + input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/mul" + input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/mul_1" + input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Sum" + input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Greater/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Greater" + op: "Greater" + input: "bert/encoder/layer_4/intermediate/dense/BiasAdd" + input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Greater/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/ones_like/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/ones_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/ones_like" + op: "Fill" + input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/ones_like/Shape" + input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/ones_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Select" + op: "Select" + input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Greater" + input: "bert/encoder/layer_4/intermediate/dense/BiasAdd" + input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/ones_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Log" + op: "Log" + input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/zeros_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/zeros_like" + op: "Fill" + input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor" + input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/zeros_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Select_1" + op: "Select" + input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Greater" + input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Log" + input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/zeros_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/mul_2" + op: "Mul" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Reshape_1" + input: "bert/encoder/layer_4/intermediate/dense/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/mul_3" + op: "Mul" + input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/mul_2" + input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Select_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/mul_3" + input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Sum_1" + input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/AddN_51" + op: "AddN" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_3_grad/Mul" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Reshape_1" + input: "gradients/bert/encoder/layer_4/intermediate/dense/Pow_grad/Reshape" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/intermediate/dense/mul_3_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/AddN_51" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/AddN_51" + input: "bert/encoder/layer_4/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_1" + input: "gradients/AddN_51" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_52" + op: "AddN" + input: "gradients/AddN_50" + input: "gradients/bert/encoder/layer_4/intermediate/dense/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_52" + input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_52" + input: "bert/encoder/layer_4/attention/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_52" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_52" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_4/attention/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_53" + op: "AddN" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_53" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_53" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_4/attention/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_4/attention/output/add" + input: "bert/encoder/layer_4/attention/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_4/attention/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_54" + op: "AddN" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_54" + input: "bert/encoder/layer_4/attention/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_54" + input: "bert/encoder/layer_4/attention/output/dropout/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/Shape" + input: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_1_grad/Mul" + input: "bert/encoder/layer_4/attention/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/Sum" + input: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_4/attention/output/dense/BiasAdd" + input: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_1_grad/Mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/Reshape" + input: "bert/encoder/layer_4/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_4/attention/self/Reshape_3" + input: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Reshape_3_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Reshape_3_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/output/dense/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_4/attention/self/Reshape_3_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/transpose_3_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_4/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/transpose_3_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_4/attention/self/Reshape_3_grad/Reshape" + input: "gradients/bert/encoder/layer_4/attention/self/transpose_3_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/layer_4/attention/self/transpose_3_grad/transpose" + input: "bert/encoder/layer_4/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/layer_4/attention/self/dropout/mul_1" + input: "gradients/bert/encoder/layer_4/attention/self/transpose_3_grad/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/strided_slice/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/strided_slice/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/strided_slice/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/strided_slice" + op: "StridedSlice" + input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/Shape" + input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/strided_slice/stack" + input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/strided_slice/stack_1" + input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/strided_slice/stack_2" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 1 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/strided_slice_1/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/strided_slice_1/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/strided_slice_1/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/strided_slice_1" + op: "StridedSlice" + input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/Shape_1" + input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/strided_slice_1/stack" + input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/strided_slice_1/stack_1" + input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/strided_slice_1/stack_2" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 1 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/strided_slice" + input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/strided_slice_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/MatMul" + input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/Sum" + input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/MatMul_1" + input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/Sum_1" + input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/Reshape" + input: "bert/encoder/layer_4/attention/self/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/Reshape" + input: "bert/encoder/layer_4/attention/self/dropout/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/transpose_2_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_4/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/transpose_2_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/Reshape_1" + input: "gradients/bert/encoder/layer_4/attention/self/transpose_2_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/Shape" + input: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_1_grad/Mul" + input: "bert/encoder/layer_4/attention/self/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/Sum" + input: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_4/attention/self/Softmax" + input: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_1_grad/Mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/layer_4/attention/self/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Softmax_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/Reshape" + input: "bert/encoder/layer_4/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Softmax_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Softmax_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_4/attention/self/Softmax_grad/mul" + input: "gradients/bert/encoder/layer_4/attention/self/Softmax_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Softmax_grad/sub" + op: "Sub" + input: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/Reshape" + input: "gradients/bert/encoder/layer_4/attention/self/Softmax_grad/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Softmax_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_4/attention/self/Softmax_grad/sub" + input: "bert/encoder/layer_4/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/value/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_4/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\001\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_4/attention/self/add_grad/Shape" + input: "gradients/bert/encoder/layer_4/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_4/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_4/attention/self/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/self/add_grad/Sum" + input: "gradients/bert/encoder/layer_4/attention/self/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_4/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_4/attention/self/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/self/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_4/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/value/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_4/attention/self/Reshape_2_grad/Reshape" + input: "bert/encoder/layer_4/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/value/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_4/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Shape" + input: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_4/attention/self/add_grad/Reshape" + input: "bert/encoder/layer_4/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Mul" + input: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Sum" + input: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_4/attention/self/MatMul" + input: "gradients/bert/encoder/layer_4/attention/self/add_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/MatMul_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_4/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/MatMul_grad/MatMul_1" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_4/attention/self/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_4/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_4/attention/self/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_4/attention/self/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_4/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_4/attention/self/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/layer_4/attention/self/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/self/transpose_grad/transpose" + input: "gradients/bert/encoder/layer_4/attention/self/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/layer_4/attention/self/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/query/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_4/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/key/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_4/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/query/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_4/attention/self/Reshape_grad/Reshape" + input: "bert/encoder/layer_4/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/query/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_4/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/key/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_4/attention/self/Reshape_1_grad/Reshape" + input: "bert/encoder/layer_4/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/key/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_4/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_55" + op: "AddN" + input: "gradients/AddN_54" + input: "gradients/bert/encoder/layer_4/attention/self/value/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_4/attention/self/query/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_4/attention/self/key/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 4 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_55" + input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_55" + input: "bert/encoder/layer_3/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_55" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_55" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_3/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_56" + op: "AddN" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_56" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_56" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_3/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_3/output/add" + input: "bert/encoder/layer_3/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_3/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_57" + op: "AddN" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/dropout/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_57" + input: "bert/encoder/layer_3/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/dropout/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_57" + input: "bert/encoder/layer_3/output/dropout/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/Shape" + input: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_3/output/dropout/mul_1_grad/Mul" + input: "bert/encoder/layer_3/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/Sum" + input: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_3/output/dense/BiasAdd" + input: "gradients/bert/encoder/layer_3/output/dropout/mul_1_grad/Mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/Reshape" + input: "bert/encoder/layer_3/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_3/intermediate/dense/mul_3" + input: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_3_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_3/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_3/intermediate/dense/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_3_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_3/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_3/intermediate/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_3_grad/Mul_1" + input: "bert/encoder/layer_3/intermediate/dense/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_3/intermediate/dense/mul_2/x" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_3_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/add_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/add_1_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/add_1_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_3/intermediate/dense/add_1_grad/Shape" + input: "gradients/bert/encoder/layer_3/intermediate/dense/add_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/add_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_2_grad/Reshape_1" + input: "gradients/bert/encoder/layer_3/intermediate/dense/add_1_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/add_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/intermediate/dense/add_1_grad/Sum" + input: "gradients/bert/encoder/layer_3/intermediate/dense/add_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/add_1_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_2_grad/Reshape_1" + input: "gradients/bert/encoder/layer_3/intermediate/dense/add_1_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/add_1_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/intermediate/dense/add_1_grad/Sum_1" + input: "gradients/bert/encoder/layer_3/intermediate/dense/add_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/Tanh_grad/TanhGrad" + op: "TanhGrad" + input: "bert/encoder/layer_3/intermediate/dense/Tanh" + input: "gradients/bert/encoder/layer_3/intermediate/dense/add_1_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Shape" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_3/intermediate/dense/Tanh_grad/TanhGrad" + input: "bert/encoder/layer_3/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Sum" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_3/intermediate/dense/mul_1/x" + input: "gradients/bert/encoder/layer_3/intermediate/dense/Tanh_grad/TanhGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Sum_1" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Shape" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Reshape_1" + input: "bert/encoder/layer_3/intermediate/dense/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Mul" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Sum" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_3/intermediate/dense/mul/x" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Shape" + input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Reshape_1" + input: "bert/encoder/layer_3/intermediate/dense/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/sub/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/sub" + op: "Sub" + input: "bert/encoder/layer_3/intermediate/dense/Pow/y" + input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/sub/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Pow" + op: "Pow" + input: "bert/encoder/layer_3/intermediate/dense/BiasAdd" + input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/mul" + input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/mul_1" + input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Sum" + input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Greater/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Greater" + op: "Greater" + input: "bert/encoder/layer_3/intermediate/dense/BiasAdd" + input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Greater/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/ones_like/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/ones_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/ones_like" + op: "Fill" + input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/ones_like/Shape" + input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/ones_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Select" + op: "Select" + input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Greater" + input: "bert/encoder/layer_3/intermediate/dense/BiasAdd" + input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/ones_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Log" + op: "Log" + input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/zeros_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/zeros_like" + op: "Fill" + input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor" + input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/zeros_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Select_1" + op: "Select" + input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Greater" + input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Log" + input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/zeros_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/mul_2" + op: "Mul" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Reshape_1" + input: "bert/encoder/layer_3/intermediate/dense/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/mul_3" + op: "Mul" + input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/mul_2" + input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Select_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/mul_3" + input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Sum_1" + input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/AddN_58" + op: "AddN" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_3_grad/Mul" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Reshape_1" + input: "gradients/bert/encoder/layer_3/intermediate/dense/Pow_grad/Reshape" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/intermediate/dense/mul_3_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/AddN_58" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/AddN_58" + input: "bert/encoder/layer_3/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_1" + input: "gradients/AddN_58" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_59" + op: "AddN" + input: "gradients/AddN_57" + input: "gradients/bert/encoder/layer_3/intermediate/dense/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_59" + input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_59" + input: "bert/encoder/layer_3/attention/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_59" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_59" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_3/attention/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_60" + op: "AddN" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_60" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_60" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_3/attention/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_3/attention/output/add" + input: "bert/encoder/layer_3/attention/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_3/attention/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_61" + op: "AddN" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_61" + input: "bert/encoder/layer_3/attention/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_61" + input: "bert/encoder/layer_3/attention/output/dropout/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/Shape" + input: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_1_grad/Mul" + input: "bert/encoder/layer_3/attention/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/Sum" + input: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_3/attention/output/dense/BiasAdd" + input: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_1_grad/Mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/Reshape" + input: "bert/encoder/layer_3/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_3/attention/self/Reshape_3" + input: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Reshape_3_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Reshape_3_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/output/dense/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_3/attention/self/Reshape_3_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/transpose_3_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_3/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/transpose_3_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_3/attention/self/Reshape_3_grad/Reshape" + input: "gradients/bert/encoder/layer_3/attention/self/transpose_3_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/layer_3/attention/self/transpose_3_grad/transpose" + input: "bert/encoder/layer_3/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/layer_3/attention/self/dropout/mul_1" + input: "gradients/bert/encoder/layer_3/attention/self/transpose_3_grad/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/strided_slice/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/strided_slice/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/strided_slice/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/strided_slice" + op: "StridedSlice" + input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/Shape" + input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/strided_slice/stack" + input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/strided_slice/stack_1" + input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/strided_slice/stack_2" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 1 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/strided_slice_1/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/strided_slice_1/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/strided_slice_1/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/strided_slice_1" + op: "StridedSlice" + input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/Shape_1" + input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/strided_slice_1/stack" + input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/strided_slice_1/stack_1" + input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/strided_slice_1/stack_2" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 1 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/strided_slice" + input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/strided_slice_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/MatMul" + input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/Sum" + input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/MatMul_1" + input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/Sum_1" + input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/Reshape" + input: "bert/encoder/layer_3/attention/self/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/Reshape" + input: "bert/encoder/layer_3/attention/self/dropout/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/transpose_2_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_3/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/transpose_2_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/Reshape_1" + input: "gradients/bert/encoder/layer_3/attention/self/transpose_2_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/Shape" + input: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_1_grad/Mul" + input: "bert/encoder/layer_3/attention/self/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/Sum" + input: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_3/attention/self/Softmax" + input: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_1_grad/Mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/layer_3/attention/self/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Softmax_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/Reshape" + input: "bert/encoder/layer_3/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Softmax_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Softmax_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_3/attention/self/Softmax_grad/mul" + input: "gradients/bert/encoder/layer_3/attention/self/Softmax_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Softmax_grad/sub" + op: "Sub" + input: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/Reshape" + input: "gradients/bert/encoder/layer_3/attention/self/Softmax_grad/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Softmax_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_3/attention/self/Softmax_grad/sub" + input: "bert/encoder/layer_3/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/value/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_3/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\001\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_3/attention/self/add_grad/Shape" + input: "gradients/bert/encoder/layer_3/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_3/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_3/attention/self/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/self/add_grad/Sum" + input: "gradients/bert/encoder/layer_3/attention/self/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_3/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_3/attention/self/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/self/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_3/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/value/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_3/attention/self/Reshape_2_grad/Reshape" + input: "bert/encoder/layer_3/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/value/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_3/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Shape" + input: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_3/attention/self/add_grad/Reshape" + input: "bert/encoder/layer_3/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Mul" + input: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Sum" + input: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_3/attention/self/MatMul" + input: "gradients/bert/encoder/layer_3/attention/self/add_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/MatMul_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_3/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/MatMul_grad/MatMul_1" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_3/attention/self/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_3/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_3/attention/self/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_3/attention/self/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_3/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_3/attention/self/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/layer_3/attention/self/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/self/transpose_grad/transpose" + input: "gradients/bert/encoder/layer_3/attention/self/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/layer_3/attention/self/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/query/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_3/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/key/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_3/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/query/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_3/attention/self/Reshape_grad/Reshape" + input: "bert/encoder/layer_3/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/query/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_3/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/key/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_3/attention/self/Reshape_1_grad/Reshape" + input: "bert/encoder/layer_3/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/key/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_3/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_62" + op: "AddN" + input: "gradients/AddN_61" + input: "gradients/bert/encoder/layer_3/attention/self/value/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_3/attention/self/query/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_3/attention/self/key/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 4 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_62" + input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_62" + input: "bert/encoder/layer_2/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_62" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_62" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_2/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_63" + op: "AddN" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_63" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_63" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_2/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_2/output/add" + input: "bert/encoder/layer_2/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_2/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_64" + op: "AddN" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/dropout/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_64" + input: "bert/encoder/layer_2/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/dropout/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_64" + input: "bert/encoder/layer_2/output/dropout/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/Shape" + input: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_2/output/dropout/mul_1_grad/Mul" + input: "bert/encoder/layer_2/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/Sum" + input: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_2/output/dense/BiasAdd" + input: "gradients/bert/encoder/layer_2/output/dropout/mul_1_grad/Mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/Reshape" + input: "bert/encoder/layer_2/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_2/intermediate/dense/mul_3" + input: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_3_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_2/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_2/intermediate/dense/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_3_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_2/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_2/intermediate/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_3_grad/Mul_1" + input: "bert/encoder/layer_2/intermediate/dense/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_2/intermediate/dense/mul_2/x" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_3_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/add_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/add_1_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/add_1_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_2/intermediate/dense/add_1_grad/Shape" + input: "gradients/bert/encoder/layer_2/intermediate/dense/add_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/add_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_2_grad/Reshape_1" + input: "gradients/bert/encoder/layer_2/intermediate/dense/add_1_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/add_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/intermediate/dense/add_1_grad/Sum" + input: "gradients/bert/encoder/layer_2/intermediate/dense/add_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/add_1_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_2_grad/Reshape_1" + input: "gradients/bert/encoder/layer_2/intermediate/dense/add_1_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/add_1_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/intermediate/dense/add_1_grad/Sum_1" + input: "gradients/bert/encoder/layer_2/intermediate/dense/add_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/Tanh_grad/TanhGrad" + op: "TanhGrad" + input: "bert/encoder/layer_2/intermediate/dense/Tanh" + input: "gradients/bert/encoder/layer_2/intermediate/dense/add_1_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Shape" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_2/intermediate/dense/Tanh_grad/TanhGrad" + input: "bert/encoder/layer_2/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Sum" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_2/intermediate/dense/mul_1/x" + input: "gradients/bert/encoder/layer_2/intermediate/dense/Tanh_grad/TanhGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Sum_1" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Shape" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Reshape_1" + input: "bert/encoder/layer_2/intermediate/dense/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Mul" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Sum" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_2/intermediate/dense/mul/x" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Shape" + input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Reshape_1" + input: "bert/encoder/layer_2/intermediate/dense/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/sub/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/sub" + op: "Sub" + input: "bert/encoder/layer_2/intermediate/dense/Pow/y" + input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/sub/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Pow" + op: "Pow" + input: "bert/encoder/layer_2/intermediate/dense/BiasAdd" + input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/mul" + input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/mul_1" + input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Sum" + input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Greater/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Greater" + op: "Greater" + input: "bert/encoder/layer_2/intermediate/dense/BiasAdd" + input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Greater/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/ones_like/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/ones_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/ones_like" + op: "Fill" + input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/ones_like/Shape" + input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/ones_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Select" + op: "Select" + input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Greater" + input: "bert/encoder/layer_2/intermediate/dense/BiasAdd" + input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/ones_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Log" + op: "Log" + input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/zeros_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/zeros_like" + op: "Fill" + input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor" + input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/zeros_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Select_1" + op: "Select" + input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Greater" + input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Log" + input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/zeros_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/mul_2" + op: "Mul" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Reshape_1" + input: "bert/encoder/layer_2/intermediate/dense/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/mul_3" + op: "Mul" + input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/mul_2" + input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Select_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/mul_3" + input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Sum_1" + input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/AddN_65" + op: "AddN" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_3_grad/Mul" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Reshape_1" + input: "gradients/bert/encoder/layer_2/intermediate/dense/Pow_grad/Reshape" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/intermediate/dense/mul_3_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/AddN_65" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/AddN_65" + input: "bert/encoder/layer_2/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_1" + input: "gradients/AddN_65" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_66" + op: "AddN" + input: "gradients/AddN_64" + input: "gradients/bert/encoder/layer_2/intermediate/dense/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_66" + input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_66" + input: "bert/encoder/layer_2/attention/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_66" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_66" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_2/attention/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_67" + op: "AddN" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_67" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_67" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_2/attention/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_2/attention/output/add" + input: "bert/encoder/layer_2/attention/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_2/attention/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_68" + op: "AddN" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_68" + input: "bert/encoder/layer_2/attention/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_68" + input: "bert/encoder/layer_2/attention/output/dropout/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/Shape" + input: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_1_grad/Mul" + input: "bert/encoder/layer_2/attention/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/Sum" + input: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_2/attention/output/dense/BiasAdd" + input: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_1_grad/Mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/Reshape" + input: "bert/encoder/layer_2/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_2/attention/self/Reshape_3" + input: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Reshape_3_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Reshape_3_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/output/dense/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_2/attention/self/Reshape_3_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/transpose_3_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_2/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/transpose_3_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_2/attention/self/Reshape_3_grad/Reshape" + input: "gradients/bert/encoder/layer_2/attention/self/transpose_3_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/layer_2/attention/self/transpose_3_grad/transpose" + input: "bert/encoder/layer_2/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/layer_2/attention/self/dropout/mul_1" + input: "gradients/bert/encoder/layer_2/attention/self/transpose_3_grad/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/strided_slice/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/strided_slice/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/strided_slice/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/strided_slice" + op: "StridedSlice" + input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/Shape" + input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/strided_slice/stack" + input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/strided_slice/stack_1" + input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/strided_slice/stack_2" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 1 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/strided_slice_1/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/strided_slice_1/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/strided_slice_1/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/strided_slice_1" + op: "StridedSlice" + input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/Shape_1" + input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/strided_slice_1/stack" + input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/strided_slice_1/stack_1" + input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/strided_slice_1/stack_2" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 1 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/strided_slice" + input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/strided_slice_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/MatMul" + input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/Sum" + input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/MatMul_1" + input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/Sum_1" + input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/Reshape" + input: "bert/encoder/layer_2/attention/self/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/Reshape" + input: "bert/encoder/layer_2/attention/self/dropout/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/transpose_2_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_2/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/transpose_2_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/Reshape_1" + input: "gradients/bert/encoder/layer_2/attention/self/transpose_2_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/Shape" + input: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_1_grad/Mul" + input: "bert/encoder/layer_2/attention/self/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/Sum" + input: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_2/attention/self/Softmax" + input: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_1_grad/Mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/layer_2/attention/self/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Softmax_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/Reshape" + input: "bert/encoder/layer_2/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Softmax_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Softmax_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_2/attention/self/Softmax_grad/mul" + input: "gradients/bert/encoder/layer_2/attention/self/Softmax_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Softmax_grad/sub" + op: "Sub" + input: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/Reshape" + input: "gradients/bert/encoder/layer_2/attention/self/Softmax_grad/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Softmax_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_2/attention/self/Softmax_grad/sub" + input: "bert/encoder/layer_2/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/value/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_2/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\001\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_2/attention/self/add_grad/Shape" + input: "gradients/bert/encoder/layer_2/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_2/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_2/attention/self/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/self/add_grad/Sum" + input: "gradients/bert/encoder/layer_2/attention/self/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_2/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_2/attention/self/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/self/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_2/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/value/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_2/attention/self/Reshape_2_grad/Reshape" + input: "bert/encoder/layer_2/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/value/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_2/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Shape" + input: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_2/attention/self/add_grad/Reshape" + input: "bert/encoder/layer_2/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Mul" + input: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Sum" + input: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_2/attention/self/MatMul" + input: "gradients/bert/encoder/layer_2/attention/self/add_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/MatMul_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_2/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/MatMul_grad/MatMul_1" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_2/attention/self/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_2/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_2/attention/self/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_2/attention/self/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_2/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_2/attention/self/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/layer_2/attention/self/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/self/transpose_grad/transpose" + input: "gradients/bert/encoder/layer_2/attention/self/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/layer_2/attention/self/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/query/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_2/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/key/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_2/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/query/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_2/attention/self/Reshape_grad/Reshape" + input: "bert/encoder/layer_2/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/query/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_2/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/key/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_2/attention/self/Reshape_1_grad/Reshape" + input: "bert/encoder/layer_2/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/key/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_2/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_69" + op: "AddN" + input: "gradients/AddN_68" + input: "gradients/bert/encoder/layer_2/attention/self/value/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_2/attention/self/query/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_2/attention/self/key/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 4 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_69" + input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_69" + input: "bert/encoder/layer_1/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_69" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_69" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_1/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_70" + op: "AddN" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_70" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_70" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_1/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_1/output/add" + input: "bert/encoder/layer_1/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_1/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_71" + op: "AddN" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/dropout/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_71" + input: "bert/encoder/layer_1/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/dropout/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_71" + input: "bert/encoder/layer_1/output/dropout/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/Shape" + input: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_1/output/dropout/mul_1_grad/Mul" + input: "bert/encoder/layer_1/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/Sum" + input: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_1/output/dense/BiasAdd" + input: "gradients/bert/encoder/layer_1/output/dropout/mul_1_grad/Mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/Reshape" + input: "bert/encoder/layer_1/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_1/intermediate/dense/mul_3" + input: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_3_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_1/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_1/intermediate/dense/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_3_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_1/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_1/intermediate/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_3_grad/Mul_1" + input: "bert/encoder/layer_1/intermediate/dense/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_1/intermediate/dense/mul_2/x" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_3_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/add_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/add_1_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/add_1_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_1/intermediate/dense/add_1_grad/Shape" + input: "gradients/bert/encoder/layer_1/intermediate/dense/add_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/add_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_2_grad/Reshape_1" + input: "gradients/bert/encoder/layer_1/intermediate/dense/add_1_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/add_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/intermediate/dense/add_1_grad/Sum" + input: "gradients/bert/encoder/layer_1/intermediate/dense/add_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/add_1_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_2_grad/Reshape_1" + input: "gradients/bert/encoder/layer_1/intermediate/dense/add_1_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/add_1_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/intermediate/dense/add_1_grad/Sum_1" + input: "gradients/bert/encoder/layer_1/intermediate/dense/add_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/Tanh_grad/TanhGrad" + op: "TanhGrad" + input: "bert/encoder/layer_1/intermediate/dense/Tanh" + input: "gradients/bert/encoder/layer_1/intermediate/dense/add_1_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Shape" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_1/intermediate/dense/Tanh_grad/TanhGrad" + input: "bert/encoder/layer_1/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Sum" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_1/intermediate/dense/mul_1/x" + input: "gradients/bert/encoder/layer_1/intermediate/dense/Tanh_grad/TanhGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Sum_1" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Shape" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Reshape_1" + input: "bert/encoder/layer_1/intermediate/dense/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Mul" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Sum" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_1/intermediate/dense/mul/x" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Shape" + input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Reshape_1" + input: "bert/encoder/layer_1/intermediate/dense/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/sub/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/sub" + op: "Sub" + input: "bert/encoder/layer_1/intermediate/dense/Pow/y" + input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/sub/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Pow" + op: "Pow" + input: "bert/encoder/layer_1/intermediate/dense/BiasAdd" + input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/mul" + input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/mul_1" + input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Sum" + input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Greater/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Greater" + op: "Greater" + input: "bert/encoder/layer_1/intermediate/dense/BiasAdd" + input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Greater/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/ones_like/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/ones_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/ones_like" + op: "Fill" + input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/ones_like/Shape" + input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/ones_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Select" + op: "Select" + input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Greater" + input: "bert/encoder/layer_1/intermediate/dense/BiasAdd" + input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/ones_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Log" + op: "Log" + input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/zeros_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/zeros_like" + op: "Fill" + input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor" + input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/zeros_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Select_1" + op: "Select" + input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Greater" + input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Log" + input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/zeros_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/mul_2" + op: "Mul" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Reshape_1" + input: "bert/encoder/layer_1/intermediate/dense/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/mul_3" + op: "Mul" + input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/mul_2" + input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Select_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/mul_3" + input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Sum_1" + input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/AddN_72" + op: "AddN" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_3_grad/Mul" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Reshape_1" + input: "gradients/bert/encoder/layer_1/intermediate/dense/Pow_grad/Reshape" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/intermediate/dense/mul_3_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/AddN_72" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/AddN_72" + input: "bert/encoder/layer_1/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_1" + input: "gradients/AddN_72" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_73" + op: "AddN" + input: "gradients/AddN_71" + input: "gradients/bert/encoder/layer_1/intermediate/dense/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_73" + input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_73" + input: "bert/encoder/layer_1/attention/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_73" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_73" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_1/attention/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_74" + op: "AddN" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_74" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_74" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_1/attention/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_1/attention/output/add" + input: "bert/encoder/layer_1/attention/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_1/attention/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_75" + op: "AddN" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_75" + input: "bert/encoder/layer_1/attention/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_75" + input: "bert/encoder/layer_1/attention/output/dropout/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/Shape" + input: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_1_grad/Mul" + input: "bert/encoder/layer_1/attention/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/Sum" + input: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_1/attention/output/dense/BiasAdd" + input: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_1_grad/Mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/Reshape" + input: "bert/encoder/layer_1/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_1/attention/self/Reshape_3" + input: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Reshape_3_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Reshape_3_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/output/dense/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_1/attention/self/Reshape_3_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/transpose_3_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_1/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/transpose_3_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_1/attention/self/Reshape_3_grad/Reshape" + input: "gradients/bert/encoder/layer_1/attention/self/transpose_3_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/layer_1/attention/self/transpose_3_grad/transpose" + input: "bert/encoder/layer_1/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/layer_1/attention/self/dropout/mul_1" + input: "gradients/bert/encoder/layer_1/attention/self/transpose_3_grad/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/strided_slice/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/strided_slice/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/strided_slice/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/strided_slice" + op: "StridedSlice" + input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/Shape" + input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/strided_slice/stack" + input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/strided_slice/stack_1" + input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/strided_slice/stack_2" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 1 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/strided_slice_1/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/strided_slice_1/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/strided_slice_1/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/strided_slice_1" + op: "StridedSlice" + input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/Shape_1" + input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/strided_slice_1/stack" + input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/strided_slice_1/stack_1" + input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/strided_slice_1/stack_2" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 1 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/strided_slice" + input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/strided_slice_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/MatMul" + input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/Sum" + input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/MatMul_1" + input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/Sum_1" + input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/Reshape" + input: "bert/encoder/layer_1/attention/self/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/Reshape" + input: "bert/encoder/layer_1/attention/self/dropout/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/transpose_2_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_1/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/transpose_2_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/Reshape_1" + input: "gradients/bert/encoder/layer_1/attention/self/transpose_2_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/Shape" + input: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_1_grad/Mul" + input: "bert/encoder/layer_1/attention/self/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/Sum" + input: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_1/attention/self/Softmax" + input: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_1_grad/Mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/layer_1/attention/self/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Softmax_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/Reshape" + input: "bert/encoder/layer_1/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Softmax_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Softmax_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_1/attention/self/Softmax_grad/mul" + input: "gradients/bert/encoder/layer_1/attention/self/Softmax_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Softmax_grad/sub" + op: "Sub" + input: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/Reshape" + input: "gradients/bert/encoder/layer_1/attention/self/Softmax_grad/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Softmax_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_1/attention/self/Softmax_grad/sub" + input: "bert/encoder/layer_1/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/value/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_1/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\001\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_1/attention/self/add_grad/Shape" + input: "gradients/bert/encoder/layer_1/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_1/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_1/attention/self/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/self/add_grad/Sum" + input: "gradients/bert/encoder/layer_1/attention/self/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_1/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_1/attention/self/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/self/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_1/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/value/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_1/attention/self/Reshape_2_grad/Reshape" + input: "bert/encoder/layer_1/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/value/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_1/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Shape" + input: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_1/attention/self/add_grad/Reshape" + input: "bert/encoder/layer_1/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Mul" + input: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Sum" + input: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_1/attention/self/MatMul" + input: "gradients/bert/encoder/layer_1/attention/self/add_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/MatMul_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_1/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/MatMul_grad/MatMul_1" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_1/attention/self/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_1/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_1/attention/self/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_1/attention/self/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_1/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_1/attention/self/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/layer_1/attention/self/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/self/transpose_grad/transpose" + input: "gradients/bert/encoder/layer_1/attention/self/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/layer_1/attention/self/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/query/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_1/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/key/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_1/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/query/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_1/attention/self/Reshape_grad/Reshape" + input: "bert/encoder/layer_1/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/query/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_1/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/key/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_1/attention/self/Reshape_1_grad/Reshape" + input: "bert/encoder/layer_1/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/key/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_1/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_76" + op: "AddN" + input: "gradients/AddN_75" + input: "gradients/bert/encoder/layer_1/attention/self/value/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_1/attention/self/query/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_1/attention/self/key/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 4 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_76" + input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_76" + input: "bert/encoder/layer_0/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_76" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_76" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_0/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_77" + op: "AddN" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_77" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_77" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_0/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_0/output/add" + input: "bert/encoder/layer_0/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_0/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_78" + op: "AddN" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/dropout/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_78" + input: "bert/encoder/layer_0/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/dropout/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_78" + input: "bert/encoder/layer_0/output/dropout/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/Shape" + input: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_0/output/dropout/mul_1_grad/Mul" + input: "bert/encoder/layer_0/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/Sum" + input: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_0/output/dense/BiasAdd" + input: "gradients/bert/encoder/layer_0/output/dropout/mul_1_grad/Mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/Reshape" + input: "bert/encoder/layer_0/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_0/intermediate/dense/mul_3" + input: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_3_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_0/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_0/intermediate/dense/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_3_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_0/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_0/intermediate/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_3_grad/Mul_1" + input: "bert/encoder/layer_0/intermediate/dense/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_0/intermediate/dense/mul_2/x" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_3_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/add_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/add_1_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/add_1_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_0/intermediate/dense/add_1_grad/Shape" + input: "gradients/bert/encoder/layer_0/intermediate/dense/add_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/add_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_2_grad/Reshape_1" + input: "gradients/bert/encoder/layer_0/intermediate/dense/add_1_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/add_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/intermediate/dense/add_1_grad/Sum" + input: "gradients/bert/encoder/layer_0/intermediate/dense/add_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/add_1_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_2_grad/Reshape_1" + input: "gradients/bert/encoder/layer_0/intermediate/dense/add_1_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/add_1_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/intermediate/dense/add_1_grad/Sum_1" + input: "gradients/bert/encoder/layer_0/intermediate/dense/add_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/Tanh_grad/TanhGrad" + op: "TanhGrad" + input: "bert/encoder/layer_0/intermediate/dense/Tanh" + input: "gradients/bert/encoder/layer_0/intermediate/dense/add_1_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Shape" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_0/intermediate/dense/Tanh_grad/TanhGrad" + input: "bert/encoder/layer_0/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Sum" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_0/intermediate/dense/mul_1/x" + input: "gradients/bert/encoder/layer_0/intermediate/dense/Tanh_grad/TanhGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Sum_1" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Shape" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Reshape_1" + input: "bert/encoder/layer_0/intermediate/dense/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Mul" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Sum" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_0/intermediate/dense/mul/x" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Shape" + input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Reshape_1" + input: "bert/encoder/layer_0/intermediate/dense/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/sub/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/sub" + op: "Sub" + input: "bert/encoder/layer_0/intermediate/dense/Pow/y" + input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/sub/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Pow" + op: "Pow" + input: "bert/encoder/layer_0/intermediate/dense/BiasAdd" + input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/mul" + input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/mul_1" + input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Sum" + input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Greater/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Greater" + op: "Greater" + input: "bert/encoder/layer_0/intermediate/dense/BiasAdd" + input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Greater/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/ones_like/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/ones_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/ones_like" + op: "Fill" + input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/ones_like/Shape" + input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/ones_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Select" + op: "Select" + input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Greater" + input: "bert/encoder/layer_0/intermediate/dense/BiasAdd" + input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/ones_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Log" + op: "Log" + input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/zeros_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/zeros_like" + op: "Fill" + input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/zeros_like/shape_as_tensor" + input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/zeros_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Select_1" + op: "Select" + input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Greater" + input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Log" + input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/zeros_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/mul_2" + op: "Mul" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Reshape_1" + input: "bert/encoder/layer_0/intermediate/dense/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/mul_3" + op: "Mul" + input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/mul_2" + input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Select_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/mul_3" + input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Sum_1" + input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/AddN_79" + op: "AddN" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_3_grad/Mul" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Reshape_1" + input: "gradients/bert/encoder/layer_0/intermediate/dense/Pow_grad/Reshape" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/intermediate/dense/mul_3_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/AddN_79" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/AddN_79" + input: "bert/encoder/layer_0/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_1" + input: "gradients/AddN_79" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_80" + op: "AddN" + input: "gradients/AddN_78" + input: "gradients/bert/encoder/layer_0/intermediate/dense/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_80" + input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_80" + input: "bert/encoder/layer_0/attention/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_80" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_80" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_0/attention/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_81" + op: "AddN" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_81" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_81" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_0/attention/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_0/attention/output/add" + input: "bert/encoder/layer_0/attention/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_0/attention/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_82" + op: "AddN" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_82" + input: "bert/encoder/layer_0/attention/output/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_82" + input: "bert/encoder/layer_0/attention/output/dropout/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/Shape" + input: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_1_grad/Mul" + input: "bert/encoder/layer_0/attention/output/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/Sum" + input: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_0/attention/output/dense/BiasAdd" + input: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_1_grad/Mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/Reshape" + input: "bert/encoder/layer_0/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_0/attention/self/Reshape_3" + input: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Reshape_3_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Reshape_3_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/output/dense/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_0/attention/self/Reshape_3_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/transpose_3_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_0/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/transpose_3_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_0/attention/self/Reshape_3_grad/Reshape" + input: "gradients/bert/encoder/layer_0/attention/self/transpose_3_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/layer_0/attention/self/transpose_3_grad/transpose" + input: "bert/encoder/layer_0/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/layer_0/attention/self/dropout/mul_1" + input: "gradients/bert/encoder/layer_0/attention/self/transpose_3_grad/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/strided_slice/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/strided_slice/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/strided_slice/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/strided_slice" + op: "StridedSlice" + input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/Shape" + input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/strided_slice/stack" + input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/strided_slice/stack_1" + input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/strided_slice/stack_2" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 1 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/strided_slice_1/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/strided_slice_1/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/strided_slice_1/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/strided_slice_1" + op: "StridedSlice" + input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/Shape_1" + input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/strided_slice_1/stack" + input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/strided_slice_1/stack_1" + input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/strided_slice_1/stack_2" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 1 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/strided_slice" + input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/strided_slice_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/MatMul" + input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/Sum" + input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/MatMul_1" + input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/Sum_1" + input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/Reshape" + input: "bert/encoder/layer_0/attention/self/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/Reshape" + input: "bert/encoder/layer_0/attention/self/dropout/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/transpose_2_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_0/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/transpose_2_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/Reshape_1" + input: "gradients/bert/encoder/layer_0/attention/self/transpose_2_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/Shape" + input: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_1_grad/Mul" + input: "bert/encoder/layer_0/attention/self/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/Sum" + input: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_0/attention/self/Softmax" + input: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_1_grad/Mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/layer_0/attention/self/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Softmax_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/Reshape" + input: "bert/encoder/layer_0/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Softmax_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Softmax_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_0/attention/self/Softmax_grad/mul" + input: "gradients/bert/encoder/layer_0/attention/self/Softmax_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Softmax_grad/sub" + op: "Sub" + input: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/Reshape" + input: "gradients/bert/encoder/layer_0/attention/self/Softmax_grad/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Softmax_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_0/attention/self/Softmax_grad/sub" + input: "bert/encoder/layer_0/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/value/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_0/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\001\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_0/attention/self/add_grad/Shape" + input: "gradients/bert/encoder/layer_0/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_0/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_0/attention/self/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/self/add_grad/Sum" + input: "gradients/bert/encoder/layer_0/attention/self/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_0/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_0/attention/self/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/self/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_0/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/value/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_0/attention/self/Reshape_2_grad/Reshape" + input: "bert/encoder/layer_0/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/value/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/Reshape_1" + input: "gradients/bert/encoder/layer_0/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Shape" + input: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_0/attention/self/add_grad/Reshape" + input: "bert/encoder/layer_0/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Mul" + input: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Sum" + input: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_0/attention/self/MatMul" + input: "gradients/bert/encoder/layer_0/attention/self/add_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/MatMul_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_0/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/MatMul_grad/MatMul_1" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_0/attention/self/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_0/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_0/attention/self/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_0/attention/self/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_0/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_0/attention/self/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/layer_0/attention/self/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/self/transpose_grad/transpose" + input: "gradients/bert/encoder/layer_0/attention/self/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/layer_0/attention/self/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/query/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_0/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/key/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_0/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/query/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_0/attention/self/Reshape_grad/Reshape" + input: "bert/encoder/layer_0/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/query/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/Reshape_1" + input: "gradients/bert/encoder/layer_0/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/key/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_0/attention/self/Reshape_1_grad/Reshape" + input: "bert/encoder/layer_0/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/key/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/Reshape_1" + input: "gradients/bert/encoder/layer_0/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_83" + op: "AddN" + input: "gradients/AddN_82" + input: "gradients/bert/encoder/layer_0/attention/self/value/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_0/attention/self/query/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_0/attention/self/key/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 4 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/AddN_83" + input: "gradients/bert/encoder/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/dropout/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/Reshape_1_grad/Reshape" + input: "bert/embeddings/dropout/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/dropout/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/Reshape_1_grad/Reshape" + input: "bert/embeddings/dropout/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/dropout/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/dropout/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/dropout/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/embeddings/dropout/mul_grad/Shape" + input: "gradients/bert/embeddings/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/embeddings/dropout/mul_1_grad/Mul" + input: "bert/embeddings/dropout/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/dropout/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/embeddings/dropout/mul_grad/Mul" + input: "gradients/bert/embeddings/dropout/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/embeddings/dropout/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/embeddings/dropout/mul_grad/Sum" + input: "gradients/bert/embeddings/dropout/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/dropout/mul_grad/Mul_1" + op: "Mul" + input: "bert/embeddings/LayerNorm/batchnorm/add_1" + input: "gradients/bert/embeddings/dropout/mul_1_grad/Mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/dropout/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/embeddings/dropout/mul_grad/Mul_1" + input: "gradients/bert/embeddings/dropout/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/embeddings/dropout/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/embeddings/dropout/mul_grad/Sum_1" + input: "gradients/bert/embeddings/dropout/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/embeddings/dropout/mul_grad/Reshape" + input: "bert/embeddings/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/bert/embeddings/dropout/mul_grad/Reshape" + input: "bert/embeddings/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/bert/embeddings/dropout/mul_grad/Reshape" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/bert/embeddings/dropout/mul_grad/Reshape" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/embeddings/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/embeddings/LayerNorm/moments/mean" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_84" + op: "AddN" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_84" + input: "bert/embeddings/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/embeddings/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_84" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/embeddings/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 3 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/embeddings/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/embeddings/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/embeddings/add_1" + input: "bert/embeddings/LayerNorm/moments/StopGradient" + input: "^gradients/bert/embeddings/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 3 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/embeddings/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_85" + op: "AddN" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/add_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/add_1_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/add_1_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/embeddings/add_1_grad/Shape" + input: "gradients/bert/embeddings/add_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/add_1_grad/Sum" + op: "Sum" + input: "gradients/AddN_85" + input: "gradients/bert/embeddings/add_1_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/embeddings/add_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/embeddings/add_1_grad/Sum" + input: "gradients/bert/embeddings/add_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/add_1_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_85" + input: "gradients/bert/embeddings/add_1_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/embeddings/add_1_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/embeddings/add_1_grad/Sum_1" + input: "gradients/bert/embeddings/add_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/Reshape_4_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/Reshape_4_grad/Reshape" + op: "Reshape" + input: "gradients/bert/embeddings/add_1_grad/Reshape_1" + input: "gradients/bert/embeddings/Reshape_4_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/embeddings/add_1_grad/Reshape" + input: "gradients/bert/embeddings/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/Reshape_3_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/Reshape_3_grad/Reshape" + op: "Reshape" + input: "gradients/bert/embeddings/add_1_grad/Reshape" + input: "gradients/bert/embeddings/Reshape_3_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/Slice_grad/Rank" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/embeddings/Slice_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/Slice_grad/stack/1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/embeddings/Slice_grad/stack" + op: "Pack" + input: "gradients/bert/embeddings/Slice_grad/Rank" + input: "gradients/bert/embeddings/Slice_grad/stack/1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "axis" + value { + i: 0 + } + } +} +node { + name: "gradients/bert/embeddings/Slice_grad/Reshape" + op: "Reshape" + input: "bert/embeddings/Slice/begin" + input: "gradients/bert/embeddings/Slice_grad/stack" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/Slice_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\002\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/Slice_grad/sub" + op: "Sub" + input: "gradients/bert/embeddings/Slice_grad/Shape_1" + input: "gradients/bert/embeddings/Slice_grad/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/Slice_grad/sub_1" + op: "Sub" + input: "gradients/bert/embeddings/Slice_grad/sub" + input: "bert/embeddings/Slice/begin" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/Slice_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/embeddings/Slice_grad/sub_1" + input: "gradients/bert/embeddings/Slice_grad/stack" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/Slice_grad/concat/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/embeddings/Slice_grad/concat" + op: "ConcatV2" + input: "gradients/bert/embeddings/Slice_grad/Reshape" + input: "gradients/bert/embeddings/Slice_grad/Reshape_1" + input: "gradients/bert/embeddings/Slice_grad/concat/axis" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/Slice_grad/Pad" + op: "Pad" + input: "gradients/bert/embeddings/Reshape_4_grad/Reshape" + input: "gradients/bert/embeddings/Slice_grad/concat" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tpaddings" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/GatherV2_grad/Shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\210R\000\000\000\000\000\000\000\003\000\000\000\000\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/GatherV2_grad/Cast" + op: "Cast" + input: "gradients/bert/embeddings/GatherV2_grad/Shape" + attr { + key: "DstT" + value { + type: DT_INT32 + } + } + attr { + key: "SrcT" + value { + type: DT_INT64 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/GatherV2_grad/Size" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 4096 + } + } + } +} +node { + name: "gradients/bert/embeddings/GatherV2_grad/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/embeddings/GatherV2_grad/ExpandDims" + op: "ExpandDims" + input: "gradients/bert/embeddings/GatherV2_grad/Size" + input: "gradients/bert/embeddings/GatherV2_grad/ExpandDims/dim" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tdim" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/GatherV2_grad/strided_slice/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/embeddings/GatherV2_grad/strided_slice/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/embeddings/GatherV2_grad/strided_slice/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/embeddings/GatherV2_grad/strided_slice" + op: "StridedSlice" + input: "gradients/bert/embeddings/GatherV2_grad/Cast" + input: "gradients/bert/embeddings/GatherV2_grad/strided_slice/stack" + input: "gradients/bert/embeddings/GatherV2_grad/strided_slice/stack_1" + input: "gradients/bert/embeddings/GatherV2_grad/strided_slice/stack_2" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 1 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "gradients/bert/embeddings/GatherV2_grad/concat/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/embeddings/GatherV2_grad/concat" + op: "ConcatV2" + input: "gradients/bert/embeddings/GatherV2_grad/ExpandDims" + input: "gradients/bert/embeddings/GatherV2_grad/strided_slice" + input: "gradients/bert/embeddings/GatherV2_grad/concat/axis" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/GatherV2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/embeddings/Reshape_1_grad/Reshape" + input: "gradients/bert/embeddings/GatherV2_grad/concat" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/GatherV2_grad/Reshape_1" + op: "Reshape" + input: "bert/embeddings/Reshape" + input: "gradients/bert/embeddings/GatherV2_grad/ExpandDims" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/embeddings/Reshape_3_grad/Reshape" + input: "bert/embeddings/token_type_embeddings/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 2 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/embeddings/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/embeddings/one_hot" + input: "gradients/bert/embeddings/Reshape_3_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "global_norm/L2Loss" + op: "L2Loss" + input: "gradients/bert/embeddings/GatherV2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/GatherV2_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_1" + op: "L2Loss" + input: "gradients/bert/embeddings/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_2" + op: "L2Loss" + input: "gradients/bert/embeddings/Slice_grad/Pad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/Slice_grad/Pad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_3" + op: "L2Loss" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_4" + op: "L2Loss" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_5" + op: "L2Loss" + input: "gradients/bert/encoder/layer_0/attention/self/query/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_6" + op: "L2Loss" + input: "gradients/bert/encoder/layer_0/attention/self/query/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_7" + op: "L2Loss" + input: "gradients/bert/encoder/layer_0/attention/self/key/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_8" + op: "L2Loss" + input: "gradients/bert/encoder/layer_0/attention/self/key/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_9" + op: "L2Loss" + input: "gradients/bert/encoder/layer_0/attention/self/value/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_10" + op: "L2Loss" + input: "gradients/bert/encoder/layer_0/attention/self/value/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_11" + op: "L2Loss" + input: "gradients/bert/encoder/layer_0/attention/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_12" + op: "L2Loss" + input: "gradients/bert/encoder/layer_0/attention/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_13" + op: "L2Loss" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_14" + op: "L2Loss" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_15" + op: "L2Loss" + input: "gradients/bert/encoder/layer_0/intermediate/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_16" + op: "L2Loss" + input: "gradients/bert/encoder/layer_0/intermediate/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_17" + op: "L2Loss" + input: "gradients/bert/encoder/layer_0/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_18" + op: "L2Loss" + input: "gradients/bert/encoder/layer_0/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_19" + op: "L2Loss" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_20" + op: "L2Loss" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_21" + op: "L2Loss" + input: "gradients/bert/encoder/layer_1/attention/self/query/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_22" + op: "L2Loss" + input: "gradients/bert/encoder/layer_1/attention/self/query/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_23" + op: "L2Loss" + input: "gradients/bert/encoder/layer_1/attention/self/key/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_24" + op: "L2Loss" + input: "gradients/bert/encoder/layer_1/attention/self/key/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_25" + op: "L2Loss" + input: "gradients/bert/encoder/layer_1/attention/self/value/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_26" + op: "L2Loss" + input: "gradients/bert/encoder/layer_1/attention/self/value/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_27" + op: "L2Loss" + input: "gradients/bert/encoder/layer_1/attention/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_28" + op: "L2Loss" + input: "gradients/bert/encoder/layer_1/attention/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_29" + op: "L2Loss" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_30" + op: "L2Loss" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_31" + op: "L2Loss" + input: "gradients/bert/encoder/layer_1/intermediate/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_32" + op: "L2Loss" + input: "gradients/bert/encoder/layer_1/intermediate/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_33" + op: "L2Loss" + input: "gradients/bert/encoder/layer_1/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_34" + op: "L2Loss" + input: "gradients/bert/encoder/layer_1/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_35" + op: "L2Loss" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_36" + op: "L2Loss" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_37" + op: "L2Loss" + input: "gradients/bert/encoder/layer_2/attention/self/query/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_38" + op: "L2Loss" + input: "gradients/bert/encoder/layer_2/attention/self/query/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_39" + op: "L2Loss" + input: "gradients/bert/encoder/layer_2/attention/self/key/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_40" + op: "L2Loss" + input: "gradients/bert/encoder/layer_2/attention/self/key/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_41" + op: "L2Loss" + input: "gradients/bert/encoder/layer_2/attention/self/value/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_42" + op: "L2Loss" + input: "gradients/bert/encoder/layer_2/attention/self/value/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_43" + op: "L2Loss" + input: "gradients/bert/encoder/layer_2/attention/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_44" + op: "L2Loss" + input: "gradients/bert/encoder/layer_2/attention/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_45" + op: "L2Loss" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_46" + op: "L2Loss" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_47" + op: "L2Loss" + input: "gradients/bert/encoder/layer_2/intermediate/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_48" + op: "L2Loss" + input: "gradients/bert/encoder/layer_2/intermediate/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_49" + op: "L2Loss" + input: "gradients/bert/encoder/layer_2/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_50" + op: "L2Loss" + input: "gradients/bert/encoder/layer_2/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_51" + op: "L2Loss" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_52" + op: "L2Loss" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_53" + op: "L2Loss" + input: "gradients/bert/encoder/layer_3/attention/self/query/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_54" + op: "L2Loss" + input: "gradients/bert/encoder/layer_3/attention/self/query/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_55" + op: "L2Loss" + input: "gradients/bert/encoder/layer_3/attention/self/key/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_56" + op: "L2Loss" + input: "gradients/bert/encoder/layer_3/attention/self/key/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_57" + op: "L2Loss" + input: "gradients/bert/encoder/layer_3/attention/self/value/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_58" + op: "L2Loss" + input: "gradients/bert/encoder/layer_3/attention/self/value/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_59" + op: "L2Loss" + input: "gradients/bert/encoder/layer_3/attention/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_60" + op: "L2Loss" + input: "gradients/bert/encoder/layer_3/attention/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_61" + op: "L2Loss" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_62" + op: "L2Loss" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_63" + op: "L2Loss" + input: "gradients/bert/encoder/layer_3/intermediate/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_64" + op: "L2Loss" + input: "gradients/bert/encoder/layer_3/intermediate/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_65" + op: "L2Loss" + input: "gradients/bert/encoder/layer_3/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_66" + op: "L2Loss" + input: "gradients/bert/encoder/layer_3/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_67" + op: "L2Loss" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_68" + op: "L2Loss" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_69" + op: "L2Loss" + input: "gradients/bert/encoder/layer_4/attention/self/query/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_70" + op: "L2Loss" + input: "gradients/bert/encoder/layer_4/attention/self/query/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_71" + op: "L2Loss" + input: "gradients/bert/encoder/layer_4/attention/self/key/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_72" + op: "L2Loss" + input: "gradients/bert/encoder/layer_4/attention/self/key/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_73" + op: "L2Loss" + input: "gradients/bert/encoder/layer_4/attention/self/value/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_74" + op: "L2Loss" + input: "gradients/bert/encoder/layer_4/attention/self/value/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_75" + op: "L2Loss" + input: "gradients/bert/encoder/layer_4/attention/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_76" + op: "L2Loss" + input: "gradients/bert/encoder/layer_4/attention/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_77" + op: "L2Loss" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_78" + op: "L2Loss" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_79" + op: "L2Loss" + input: "gradients/bert/encoder/layer_4/intermediate/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_80" + op: "L2Loss" + input: "gradients/bert/encoder/layer_4/intermediate/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_81" + op: "L2Loss" + input: "gradients/bert/encoder/layer_4/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_82" + op: "L2Loss" + input: "gradients/bert/encoder/layer_4/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_83" + op: "L2Loss" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_84" + op: "L2Loss" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_85" + op: "L2Loss" + input: "gradients/bert/encoder/layer_5/attention/self/query/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_86" + op: "L2Loss" + input: "gradients/bert/encoder/layer_5/attention/self/query/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_87" + op: "L2Loss" + input: "gradients/bert/encoder/layer_5/attention/self/key/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_88" + op: "L2Loss" + input: "gradients/bert/encoder/layer_5/attention/self/key/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_89" + op: "L2Loss" + input: "gradients/bert/encoder/layer_5/attention/self/value/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_90" + op: "L2Loss" + input: "gradients/bert/encoder/layer_5/attention/self/value/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_91" + op: "L2Loss" + input: "gradients/bert/encoder/layer_5/attention/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_92" + op: "L2Loss" + input: "gradients/bert/encoder/layer_5/attention/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_93" + op: "L2Loss" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_94" + op: "L2Loss" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_95" + op: "L2Loss" + input: "gradients/bert/encoder/layer_5/intermediate/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_96" + op: "L2Loss" + input: "gradients/bert/encoder/layer_5/intermediate/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_97" + op: "L2Loss" + input: "gradients/bert/encoder/layer_5/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_98" + op: "L2Loss" + input: "gradients/bert/encoder/layer_5/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_99" + op: "L2Loss" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_100" + op: "L2Loss" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_101" + op: "L2Loss" + input: "gradients/bert/encoder/layer_6/attention/self/query/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_102" + op: "L2Loss" + input: "gradients/bert/encoder/layer_6/attention/self/query/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_103" + op: "L2Loss" + input: "gradients/bert/encoder/layer_6/attention/self/key/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_104" + op: "L2Loss" + input: "gradients/bert/encoder/layer_6/attention/self/key/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_105" + op: "L2Loss" + input: "gradients/bert/encoder/layer_6/attention/self/value/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_106" + op: "L2Loss" + input: "gradients/bert/encoder/layer_6/attention/self/value/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_107" + op: "L2Loss" + input: "gradients/bert/encoder/layer_6/attention/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_108" + op: "L2Loss" + input: "gradients/bert/encoder/layer_6/attention/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_109" + op: "L2Loss" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_110" + op: "L2Loss" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_111" + op: "L2Loss" + input: "gradients/bert/encoder/layer_6/intermediate/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_112" + op: "L2Loss" + input: "gradients/bert/encoder/layer_6/intermediate/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_113" + op: "L2Loss" + input: "gradients/bert/encoder/layer_6/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_114" + op: "L2Loss" + input: "gradients/bert/encoder/layer_6/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_115" + op: "L2Loss" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_116" + op: "L2Loss" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_117" + op: "L2Loss" + input: "gradients/bert/encoder/layer_7/attention/self/query/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_118" + op: "L2Loss" + input: "gradients/bert/encoder/layer_7/attention/self/query/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_119" + op: "L2Loss" + input: "gradients/bert/encoder/layer_7/attention/self/key/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_120" + op: "L2Loss" + input: "gradients/bert/encoder/layer_7/attention/self/key/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_121" + op: "L2Loss" + input: "gradients/bert/encoder/layer_7/attention/self/value/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_122" + op: "L2Loss" + input: "gradients/bert/encoder/layer_7/attention/self/value/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_123" + op: "L2Loss" + input: "gradients/bert/encoder/layer_7/attention/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_124" + op: "L2Loss" + input: "gradients/bert/encoder/layer_7/attention/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_125" + op: "L2Loss" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_126" + op: "L2Loss" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_127" + op: "L2Loss" + input: "gradients/bert/encoder/layer_7/intermediate/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_128" + op: "L2Loss" + input: "gradients/bert/encoder/layer_7/intermediate/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_129" + op: "L2Loss" + input: "gradients/bert/encoder/layer_7/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_130" + op: "L2Loss" + input: "gradients/bert/encoder/layer_7/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_131" + op: "L2Loss" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_132" + op: "L2Loss" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_133" + op: "L2Loss" + input: "gradients/bert/encoder/layer_8/attention/self/query/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_134" + op: "L2Loss" + input: "gradients/bert/encoder/layer_8/attention/self/query/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_135" + op: "L2Loss" + input: "gradients/bert/encoder/layer_8/attention/self/key/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_136" + op: "L2Loss" + input: "gradients/bert/encoder/layer_8/attention/self/key/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_137" + op: "L2Loss" + input: "gradients/bert/encoder/layer_8/attention/self/value/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_138" + op: "L2Loss" + input: "gradients/bert/encoder/layer_8/attention/self/value/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_139" + op: "L2Loss" + input: "gradients/bert/encoder/layer_8/attention/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_140" + op: "L2Loss" + input: "gradients/bert/encoder/layer_8/attention/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_141" + op: "L2Loss" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_142" + op: "L2Loss" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_143" + op: "L2Loss" + input: "gradients/bert/encoder/layer_8/intermediate/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_144" + op: "L2Loss" + input: "gradients/bert/encoder/layer_8/intermediate/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_145" + op: "L2Loss" + input: "gradients/bert/encoder/layer_8/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_146" + op: "L2Loss" + input: "gradients/bert/encoder/layer_8/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_147" + op: "L2Loss" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_148" + op: "L2Loss" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_149" + op: "L2Loss" + input: "gradients/bert/encoder/layer_9/attention/self/query/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_150" + op: "L2Loss" + input: "gradients/bert/encoder/layer_9/attention/self/query/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_151" + op: "L2Loss" + input: "gradients/bert/encoder/layer_9/attention/self/key/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_152" + op: "L2Loss" + input: "gradients/bert/encoder/layer_9/attention/self/key/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_153" + op: "L2Loss" + input: "gradients/bert/encoder/layer_9/attention/self/value/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_154" + op: "L2Loss" + input: "gradients/bert/encoder/layer_9/attention/self/value/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_155" + op: "L2Loss" + input: "gradients/bert/encoder/layer_9/attention/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_156" + op: "L2Loss" + input: "gradients/bert/encoder/layer_9/attention/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_157" + op: "L2Loss" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_158" + op: "L2Loss" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_159" + op: "L2Loss" + input: "gradients/bert/encoder/layer_9/intermediate/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_160" + op: "L2Loss" + input: "gradients/bert/encoder/layer_9/intermediate/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_161" + op: "L2Loss" + input: "gradients/bert/encoder/layer_9/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_162" + op: "L2Loss" + input: "gradients/bert/encoder/layer_9/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_163" + op: "L2Loss" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_164" + op: "L2Loss" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_165" + op: "L2Loss" + input: "gradients/bert/encoder/layer_10/attention/self/query/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_166" + op: "L2Loss" + input: "gradients/bert/encoder/layer_10/attention/self/query/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_167" + op: "L2Loss" + input: "gradients/bert/encoder/layer_10/attention/self/key/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_168" + op: "L2Loss" + input: "gradients/bert/encoder/layer_10/attention/self/key/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_169" + op: "L2Loss" + input: "gradients/bert/encoder/layer_10/attention/self/value/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_170" + op: "L2Loss" + input: "gradients/bert/encoder/layer_10/attention/self/value/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_171" + op: "L2Loss" + input: "gradients/bert/encoder/layer_10/attention/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_172" + op: "L2Loss" + input: "gradients/bert/encoder/layer_10/attention/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_173" + op: "L2Loss" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_174" + op: "L2Loss" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_175" + op: "L2Loss" + input: "gradients/bert/encoder/layer_10/intermediate/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_176" + op: "L2Loss" + input: "gradients/bert/encoder/layer_10/intermediate/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_177" + op: "L2Loss" + input: "gradients/bert/encoder/layer_10/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_178" + op: "L2Loss" + input: "gradients/bert/encoder/layer_10/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_179" + op: "L2Loss" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_180" + op: "L2Loss" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_181" + op: "L2Loss" + input: "gradients/bert/encoder/layer_11/attention/self/query/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_182" + op: "L2Loss" + input: "gradients/bert/encoder/layer_11/attention/self/query/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_183" + op: "L2Loss" + input: "gradients/bert/encoder/layer_11/attention/self/key/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_184" + op: "L2Loss" + input: "gradients/bert/encoder/layer_11/attention/self/key/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_185" + op: "L2Loss" + input: "gradients/bert/encoder/layer_11/attention/self/value/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_186" + op: "L2Loss" + input: "gradients/bert/encoder/layer_11/attention/self/value/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_187" + op: "L2Loss" + input: "gradients/bert/encoder/layer_11/attention/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_188" + op: "L2Loss" + input: "gradients/bert/encoder/layer_11/attention/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_189" + op: "L2Loss" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_190" + op: "L2Loss" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_191" + op: "L2Loss" + input: "gradients/bert/encoder/layer_11/intermediate/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_192" + op: "L2Loss" + input: "gradients/bert/encoder/layer_11/intermediate/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_193" + op: "L2Loss" + input: "gradients/bert/encoder/layer_11/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_194" + op: "L2Loss" + input: "gradients/bert/encoder/layer_11/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_195" + op: "L2Loss" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_196" + op: "L2Loss" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_197" + op: "L2Loss" + input: "gradients/bert/pooler/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/pooler/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_198" + op: "L2Loss" + input: "gradients/bert/pooler/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/pooler/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_199" + op: "L2Loss" + input: "gradients/loss/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/loss/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_200" + op: "L2Loss" + input: "gradients/loss/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/loss/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/stack" + op: "Pack" + input: "global_norm/L2Loss" + input: "global_norm/L2Loss_1" + input: "global_norm/L2Loss_2" + input: "global_norm/L2Loss_3" + input: "global_norm/L2Loss_4" + input: "global_norm/L2Loss_5" + input: "global_norm/L2Loss_6" + input: "global_norm/L2Loss_7" + input: "global_norm/L2Loss_8" + input: "global_norm/L2Loss_9" + input: "global_norm/L2Loss_10" + input: "global_norm/L2Loss_11" + input: "global_norm/L2Loss_12" + input: "global_norm/L2Loss_13" + input: "global_norm/L2Loss_14" + input: "global_norm/L2Loss_15" + input: "global_norm/L2Loss_16" + input: "global_norm/L2Loss_17" + input: "global_norm/L2Loss_18" + input: "global_norm/L2Loss_19" + input: "global_norm/L2Loss_20" + input: "global_norm/L2Loss_21" + input: "global_norm/L2Loss_22" + input: "global_norm/L2Loss_23" + input: "global_norm/L2Loss_24" + input: "global_norm/L2Loss_25" + input: "global_norm/L2Loss_26" + input: "global_norm/L2Loss_27" + input: "global_norm/L2Loss_28" + input: "global_norm/L2Loss_29" + input: "global_norm/L2Loss_30" + input: "global_norm/L2Loss_31" + input: "global_norm/L2Loss_32" + input: "global_norm/L2Loss_33" + input: "global_norm/L2Loss_34" + input: "global_norm/L2Loss_35" + input: "global_norm/L2Loss_36" + input: "global_norm/L2Loss_37" + input: "global_norm/L2Loss_38" + input: "global_norm/L2Loss_39" + input: "global_norm/L2Loss_40" + input: "global_norm/L2Loss_41" + input: "global_norm/L2Loss_42" + input: "global_norm/L2Loss_43" + input: "global_norm/L2Loss_44" + input: "global_norm/L2Loss_45" + input: "global_norm/L2Loss_46" + input: "global_norm/L2Loss_47" + input: "global_norm/L2Loss_48" + input: "global_norm/L2Loss_49" + input: "global_norm/L2Loss_50" + input: "global_norm/L2Loss_51" + input: "global_norm/L2Loss_52" + input: "global_norm/L2Loss_53" + input: "global_norm/L2Loss_54" + input: "global_norm/L2Loss_55" + input: "global_norm/L2Loss_56" + input: "global_norm/L2Loss_57" + input: "global_norm/L2Loss_58" + input: "global_norm/L2Loss_59" + input: "global_norm/L2Loss_60" + input: "global_norm/L2Loss_61" + input: "global_norm/L2Loss_62" + input: "global_norm/L2Loss_63" + input: "global_norm/L2Loss_64" + input: "global_norm/L2Loss_65" + input: "global_norm/L2Loss_66" + input: "global_norm/L2Loss_67" + input: "global_norm/L2Loss_68" + input: "global_norm/L2Loss_69" + input: "global_norm/L2Loss_70" + input: "global_norm/L2Loss_71" + input: "global_norm/L2Loss_72" + input: "global_norm/L2Loss_73" + input: "global_norm/L2Loss_74" + input: "global_norm/L2Loss_75" + input: "global_norm/L2Loss_76" + input: "global_norm/L2Loss_77" + input: "global_norm/L2Loss_78" + input: "global_norm/L2Loss_79" + input: "global_norm/L2Loss_80" + input: "global_norm/L2Loss_81" + input: "global_norm/L2Loss_82" + input: "global_norm/L2Loss_83" + input: "global_norm/L2Loss_84" + input: "global_norm/L2Loss_85" + input: "global_norm/L2Loss_86" + input: "global_norm/L2Loss_87" + input: "global_norm/L2Loss_88" + input: "global_norm/L2Loss_89" + input: "global_norm/L2Loss_90" + input: "global_norm/L2Loss_91" + input: "global_norm/L2Loss_92" + input: "global_norm/L2Loss_93" + input: "global_norm/L2Loss_94" + input: "global_norm/L2Loss_95" + input: "global_norm/L2Loss_96" + input: "global_norm/L2Loss_97" + input: "global_norm/L2Loss_98" + input: "global_norm/L2Loss_99" + input: "global_norm/L2Loss_100" + input: "global_norm/L2Loss_101" + input: "global_norm/L2Loss_102" + input: "global_norm/L2Loss_103" + input: "global_norm/L2Loss_104" + input: "global_norm/L2Loss_105" + input: "global_norm/L2Loss_106" + input: "global_norm/L2Loss_107" + input: "global_norm/L2Loss_108" + input: "global_norm/L2Loss_109" + input: "global_norm/L2Loss_110" + input: "global_norm/L2Loss_111" + input: "global_norm/L2Loss_112" + input: "global_norm/L2Loss_113" + input: "global_norm/L2Loss_114" + input: "global_norm/L2Loss_115" + input: "global_norm/L2Loss_116" + input: "global_norm/L2Loss_117" + input: "global_norm/L2Loss_118" + input: "global_norm/L2Loss_119" + input: "global_norm/L2Loss_120" + input: "global_norm/L2Loss_121" + input: "global_norm/L2Loss_122" + input: "global_norm/L2Loss_123" + input: "global_norm/L2Loss_124" + input: "global_norm/L2Loss_125" + input: "global_norm/L2Loss_126" + input: "global_norm/L2Loss_127" + input: "global_norm/L2Loss_128" + input: "global_norm/L2Loss_129" + input: "global_norm/L2Loss_130" + input: "global_norm/L2Loss_131" + input: "global_norm/L2Loss_132" + input: "global_norm/L2Loss_133" + input: "global_norm/L2Loss_134" + input: "global_norm/L2Loss_135" + input: "global_norm/L2Loss_136" + input: "global_norm/L2Loss_137" + input: "global_norm/L2Loss_138" + input: "global_norm/L2Loss_139" + input: "global_norm/L2Loss_140" + input: "global_norm/L2Loss_141" + input: "global_norm/L2Loss_142" + input: "global_norm/L2Loss_143" + input: "global_norm/L2Loss_144" + input: "global_norm/L2Loss_145" + input: "global_norm/L2Loss_146" + input: "global_norm/L2Loss_147" + input: "global_norm/L2Loss_148" + input: "global_norm/L2Loss_149" + input: "global_norm/L2Loss_150" + input: "global_norm/L2Loss_151" + input: "global_norm/L2Loss_152" + input: "global_norm/L2Loss_153" + input: "global_norm/L2Loss_154" + input: "global_norm/L2Loss_155" + input: "global_norm/L2Loss_156" + input: "global_norm/L2Loss_157" + input: "global_norm/L2Loss_158" + input: "global_norm/L2Loss_159" + input: "global_norm/L2Loss_160" + input: "global_norm/L2Loss_161" + input: "global_norm/L2Loss_162" + input: "global_norm/L2Loss_163" + input: "global_norm/L2Loss_164" + input: "global_norm/L2Loss_165" + input: "global_norm/L2Loss_166" + input: "global_norm/L2Loss_167" + input: "global_norm/L2Loss_168" + input: "global_norm/L2Loss_169" + input: "global_norm/L2Loss_170" + input: "global_norm/L2Loss_171" + input: "global_norm/L2Loss_172" + input: "global_norm/L2Loss_173" + input: "global_norm/L2Loss_174" + input: "global_norm/L2Loss_175" + input: "global_norm/L2Loss_176" + input: "global_norm/L2Loss_177" + input: "global_norm/L2Loss_178" + input: "global_norm/L2Loss_179" + input: "global_norm/L2Loss_180" + input: "global_norm/L2Loss_181" + input: "global_norm/L2Loss_182" + input: "global_norm/L2Loss_183" + input: "global_norm/L2Loss_184" + input: "global_norm/L2Loss_185" + input: "global_norm/L2Loss_186" + input: "global_norm/L2Loss_187" + input: "global_norm/L2Loss_188" + input: "global_norm/L2Loss_189" + input: "global_norm/L2Loss_190" + input: "global_norm/L2Loss_191" + input: "global_norm/L2Loss_192" + input: "global_norm/L2Loss_193" + input: "global_norm/L2Loss_194" + input: "global_norm/L2Loss_195" + input: "global_norm/L2Loss_196" + input: "global_norm/L2Loss_197" + input: "global_norm/L2Loss_198" + input: "global_norm/L2Loss_199" + input: "global_norm/L2Loss_200" + attr { + key: "N" + value { + i: 201 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 201 + } + } + } + } + } + attr { + key: "axis" + value { + i: 0 + } + } +} +node { + name: "global_norm/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "global_norm/Sum" + op: "Sum" + input: "global_norm/stack" + input: "global_norm/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "global_norm/Const_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "global_norm/mul" + op: "Mul" + input: "global_norm/Sum" + input: "global_norm/Const_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/global_norm" + op: "Sqrt" + input: "global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "clip_by_global_norm/truediv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "clip_by_global_norm/truediv" + op: "RealDiv" + input: "clip_by_global_norm/truediv/x" + input: "global_norm/global_norm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "clip_by_global_norm/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "clip_by_global_norm/truediv_1/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "clip_by_global_norm/truediv_1" + op: "RealDiv" + input: "clip_by_global_norm/Const" + input: "clip_by_global_norm/truediv_1/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "clip_by_global_norm/Minimum" + op: "Minimum" + input: "clip_by_global_norm/truediv" + input: "clip_by_global_norm/truediv_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "clip_by_global_norm/mul/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "clip_by_global_norm/mul" + op: "Mul" + input: "clip_by_global_norm/mul/x" + input: "clip_by_global_norm/Minimum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "clip_by_global_norm/IsFinite" + op: "IsFinite" + input: "global_norm/global_norm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "clip_by_global_norm/Const_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: nan + } + } + } +} +node { + name: "clip_by_global_norm/Select" + op: "Select" + input: "clip_by_global_norm/IsFinite" + input: "clip_by_global_norm/mul" + input: "clip_by_global_norm/Const_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_1" + op: "Mul" + input: "gradients/bert/embeddings/GatherV2_grad/Reshape" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/GatherV2_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_0" + op: "Identity" + input: "clip_by_global_norm/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/GatherV2_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_2" + op: "Mul" + input: "gradients/bert/embeddings/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_1" + op: "Identity" + input: "clip_by_global_norm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_3" + op: "Mul" + input: "gradients/bert/embeddings/Slice_grad/Pad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/Slice_grad/Pad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_2" + op: "Identity" + input: "clip_by_global_norm/mul_3" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/Slice_grad/Pad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_4" + op: "Mul" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_3" + op: "Identity" + input: "clip_by_global_norm/mul_4" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_5" + op: "Mul" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_4" + op: "Identity" + input: "clip_by_global_norm/mul_5" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_6" + op: "Mul" + input: "gradients/bert/encoder/layer_0/attention/self/query/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_5" + op: "Identity" + input: "clip_by_global_norm/mul_6" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_7" + op: "Mul" + input: "gradients/bert/encoder/layer_0/attention/self/query/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_6" + op: "Identity" + input: "clip_by_global_norm/mul_7" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_8" + op: "Mul" + input: "gradients/bert/encoder/layer_0/attention/self/key/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_7" + op: "Identity" + input: "clip_by_global_norm/mul_8" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_9" + op: "Mul" + input: "gradients/bert/encoder/layer_0/attention/self/key/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_8" + op: "Identity" + input: "clip_by_global_norm/mul_9" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_10" + op: "Mul" + input: "gradients/bert/encoder/layer_0/attention/self/value/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_9" + op: "Identity" + input: "clip_by_global_norm/mul_10" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_11" + op: "Mul" + input: "gradients/bert/encoder/layer_0/attention/self/value/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_10" + op: "Identity" + input: "clip_by_global_norm/mul_11" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_12" + op: "Mul" + input: "gradients/bert/encoder/layer_0/attention/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_11" + op: "Identity" + input: "clip_by_global_norm/mul_12" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_13" + op: "Mul" + input: "gradients/bert/encoder/layer_0/attention/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_12" + op: "Identity" + input: "clip_by_global_norm/mul_13" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_14" + op: "Mul" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_13" + op: "Identity" + input: "clip_by_global_norm/mul_14" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_15" + op: "Mul" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_14" + op: "Identity" + input: "clip_by_global_norm/mul_15" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_16" + op: "Mul" + input: "gradients/bert/encoder/layer_0/intermediate/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_15" + op: "Identity" + input: "clip_by_global_norm/mul_16" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_17" + op: "Mul" + input: "gradients/bert/encoder/layer_0/intermediate/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_16" + op: "Identity" + input: "clip_by_global_norm/mul_17" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_18" + op: "Mul" + input: "gradients/bert/encoder/layer_0/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_17" + op: "Identity" + input: "clip_by_global_norm/mul_18" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_19" + op: "Mul" + input: "gradients/bert/encoder/layer_0/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_18" + op: "Identity" + input: "clip_by_global_norm/mul_19" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_20" + op: "Mul" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_19" + op: "Identity" + input: "clip_by_global_norm/mul_20" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_21" + op: "Mul" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_20" + op: "Identity" + input: "clip_by_global_norm/mul_21" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_22" + op: "Mul" + input: "gradients/bert/encoder/layer_1/attention/self/query/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_21" + op: "Identity" + input: "clip_by_global_norm/mul_22" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_23" + op: "Mul" + input: "gradients/bert/encoder/layer_1/attention/self/query/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_22" + op: "Identity" + input: "clip_by_global_norm/mul_23" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_24" + op: "Mul" + input: "gradients/bert/encoder/layer_1/attention/self/key/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_23" + op: "Identity" + input: "clip_by_global_norm/mul_24" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_25" + op: "Mul" + input: "gradients/bert/encoder/layer_1/attention/self/key/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_24" + op: "Identity" + input: "clip_by_global_norm/mul_25" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_26" + op: "Mul" + input: "gradients/bert/encoder/layer_1/attention/self/value/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_25" + op: "Identity" + input: "clip_by_global_norm/mul_26" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_27" + op: "Mul" + input: "gradients/bert/encoder/layer_1/attention/self/value/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_26" + op: "Identity" + input: "clip_by_global_norm/mul_27" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_28" + op: "Mul" + input: "gradients/bert/encoder/layer_1/attention/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_27" + op: "Identity" + input: "clip_by_global_norm/mul_28" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_29" + op: "Mul" + input: "gradients/bert/encoder/layer_1/attention/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_28" + op: "Identity" + input: "clip_by_global_norm/mul_29" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_30" + op: "Mul" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_29" + op: "Identity" + input: "clip_by_global_norm/mul_30" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_31" + op: "Mul" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_30" + op: "Identity" + input: "clip_by_global_norm/mul_31" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_32" + op: "Mul" + input: "gradients/bert/encoder/layer_1/intermediate/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_31" + op: "Identity" + input: "clip_by_global_norm/mul_32" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_33" + op: "Mul" + input: "gradients/bert/encoder/layer_1/intermediate/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_32" + op: "Identity" + input: "clip_by_global_norm/mul_33" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_34" + op: "Mul" + input: "gradients/bert/encoder/layer_1/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_33" + op: "Identity" + input: "clip_by_global_norm/mul_34" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_35" + op: "Mul" + input: "gradients/bert/encoder/layer_1/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_34" + op: "Identity" + input: "clip_by_global_norm/mul_35" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_36" + op: "Mul" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_35" + op: "Identity" + input: "clip_by_global_norm/mul_36" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_37" + op: "Mul" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_36" + op: "Identity" + input: "clip_by_global_norm/mul_37" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_38" + op: "Mul" + input: "gradients/bert/encoder/layer_2/attention/self/query/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_37" + op: "Identity" + input: "clip_by_global_norm/mul_38" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_39" + op: "Mul" + input: "gradients/bert/encoder/layer_2/attention/self/query/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_38" + op: "Identity" + input: "clip_by_global_norm/mul_39" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_40" + op: "Mul" + input: "gradients/bert/encoder/layer_2/attention/self/key/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_39" + op: "Identity" + input: "clip_by_global_norm/mul_40" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_41" + op: "Mul" + input: "gradients/bert/encoder/layer_2/attention/self/key/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_40" + op: "Identity" + input: "clip_by_global_norm/mul_41" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_42" + op: "Mul" + input: "gradients/bert/encoder/layer_2/attention/self/value/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_41" + op: "Identity" + input: "clip_by_global_norm/mul_42" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_43" + op: "Mul" + input: "gradients/bert/encoder/layer_2/attention/self/value/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_42" + op: "Identity" + input: "clip_by_global_norm/mul_43" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_44" + op: "Mul" + input: "gradients/bert/encoder/layer_2/attention/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_43" + op: "Identity" + input: "clip_by_global_norm/mul_44" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_45" + op: "Mul" + input: "gradients/bert/encoder/layer_2/attention/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_44" + op: "Identity" + input: "clip_by_global_norm/mul_45" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_46" + op: "Mul" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_45" + op: "Identity" + input: "clip_by_global_norm/mul_46" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_47" + op: "Mul" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_46" + op: "Identity" + input: "clip_by_global_norm/mul_47" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_48" + op: "Mul" + input: "gradients/bert/encoder/layer_2/intermediate/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_47" + op: "Identity" + input: "clip_by_global_norm/mul_48" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_49" + op: "Mul" + input: "gradients/bert/encoder/layer_2/intermediate/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_48" + op: "Identity" + input: "clip_by_global_norm/mul_49" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_50" + op: "Mul" + input: "gradients/bert/encoder/layer_2/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_49" + op: "Identity" + input: "clip_by_global_norm/mul_50" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_51" + op: "Mul" + input: "gradients/bert/encoder/layer_2/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_50" + op: "Identity" + input: "clip_by_global_norm/mul_51" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_52" + op: "Mul" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_51" + op: "Identity" + input: "clip_by_global_norm/mul_52" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_53" + op: "Mul" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_52" + op: "Identity" + input: "clip_by_global_norm/mul_53" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_54" + op: "Mul" + input: "gradients/bert/encoder/layer_3/attention/self/query/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_53" + op: "Identity" + input: "clip_by_global_norm/mul_54" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_55" + op: "Mul" + input: "gradients/bert/encoder/layer_3/attention/self/query/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_54" + op: "Identity" + input: "clip_by_global_norm/mul_55" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_56" + op: "Mul" + input: "gradients/bert/encoder/layer_3/attention/self/key/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_55" + op: "Identity" + input: "clip_by_global_norm/mul_56" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_57" + op: "Mul" + input: "gradients/bert/encoder/layer_3/attention/self/key/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_56" + op: "Identity" + input: "clip_by_global_norm/mul_57" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_58" + op: "Mul" + input: "gradients/bert/encoder/layer_3/attention/self/value/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_57" + op: "Identity" + input: "clip_by_global_norm/mul_58" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_59" + op: "Mul" + input: "gradients/bert/encoder/layer_3/attention/self/value/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_58" + op: "Identity" + input: "clip_by_global_norm/mul_59" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_60" + op: "Mul" + input: "gradients/bert/encoder/layer_3/attention/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_59" + op: "Identity" + input: "clip_by_global_norm/mul_60" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_61" + op: "Mul" + input: "gradients/bert/encoder/layer_3/attention/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_60" + op: "Identity" + input: "clip_by_global_norm/mul_61" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_62" + op: "Mul" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_61" + op: "Identity" + input: "clip_by_global_norm/mul_62" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_63" + op: "Mul" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_62" + op: "Identity" + input: "clip_by_global_norm/mul_63" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_64" + op: "Mul" + input: "gradients/bert/encoder/layer_3/intermediate/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_63" + op: "Identity" + input: "clip_by_global_norm/mul_64" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_65" + op: "Mul" + input: "gradients/bert/encoder/layer_3/intermediate/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_64" + op: "Identity" + input: "clip_by_global_norm/mul_65" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_66" + op: "Mul" + input: "gradients/bert/encoder/layer_3/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_65" + op: "Identity" + input: "clip_by_global_norm/mul_66" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_67" + op: "Mul" + input: "gradients/bert/encoder/layer_3/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_66" + op: "Identity" + input: "clip_by_global_norm/mul_67" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_68" + op: "Mul" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_67" + op: "Identity" + input: "clip_by_global_norm/mul_68" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_69" + op: "Mul" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_68" + op: "Identity" + input: "clip_by_global_norm/mul_69" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_70" + op: "Mul" + input: "gradients/bert/encoder/layer_4/attention/self/query/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_69" + op: "Identity" + input: "clip_by_global_norm/mul_70" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_71" + op: "Mul" + input: "gradients/bert/encoder/layer_4/attention/self/query/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_70" + op: "Identity" + input: "clip_by_global_norm/mul_71" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_72" + op: "Mul" + input: "gradients/bert/encoder/layer_4/attention/self/key/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_71" + op: "Identity" + input: "clip_by_global_norm/mul_72" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_73" + op: "Mul" + input: "gradients/bert/encoder/layer_4/attention/self/key/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_72" + op: "Identity" + input: "clip_by_global_norm/mul_73" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_74" + op: "Mul" + input: "gradients/bert/encoder/layer_4/attention/self/value/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_73" + op: "Identity" + input: "clip_by_global_norm/mul_74" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_75" + op: "Mul" + input: "gradients/bert/encoder/layer_4/attention/self/value/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_74" + op: "Identity" + input: "clip_by_global_norm/mul_75" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_76" + op: "Mul" + input: "gradients/bert/encoder/layer_4/attention/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_75" + op: "Identity" + input: "clip_by_global_norm/mul_76" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_77" + op: "Mul" + input: "gradients/bert/encoder/layer_4/attention/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_76" + op: "Identity" + input: "clip_by_global_norm/mul_77" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_78" + op: "Mul" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_77" + op: "Identity" + input: "clip_by_global_norm/mul_78" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_79" + op: "Mul" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_78" + op: "Identity" + input: "clip_by_global_norm/mul_79" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_80" + op: "Mul" + input: "gradients/bert/encoder/layer_4/intermediate/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_79" + op: "Identity" + input: "clip_by_global_norm/mul_80" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_81" + op: "Mul" + input: "gradients/bert/encoder/layer_4/intermediate/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_80" + op: "Identity" + input: "clip_by_global_norm/mul_81" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_82" + op: "Mul" + input: "gradients/bert/encoder/layer_4/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_81" + op: "Identity" + input: "clip_by_global_norm/mul_82" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_83" + op: "Mul" + input: "gradients/bert/encoder/layer_4/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_82" + op: "Identity" + input: "clip_by_global_norm/mul_83" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_84" + op: "Mul" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_83" + op: "Identity" + input: "clip_by_global_norm/mul_84" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_85" + op: "Mul" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_84" + op: "Identity" + input: "clip_by_global_norm/mul_85" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_86" + op: "Mul" + input: "gradients/bert/encoder/layer_5/attention/self/query/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_85" + op: "Identity" + input: "clip_by_global_norm/mul_86" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_87" + op: "Mul" + input: "gradients/bert/encoder/layer_5/attention/self/query/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_86" + op: "Identity" + input: "clip_by_global_norm/mul_87" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_88" + op: "Mul" + input: "gradients/bert/encoder/layer_5/attention/self/key/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_87" + op: "Identity" + input: "clip_by_global_norm/mul_88" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_89" + op: "Mul" + input: "gradients/bert/encoder/layer_5/attention/self/key/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_88" + op: "Identity" + input: "clip_by_global_norm/mul_89" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_90" + op: "Mul" + input: "gradients/bert/encoder/layer_5/attention/self/value/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_89" + op: "Identity" + input: "clip_by_global_norm/mul_90" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_91" + op: "Mul" + input: "gradients/bert/encoder/layer_5/attention/self/value/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_90" + op: "Identity" + input: "clip_by_global_norm/mul_91" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_92" + op: "Mul" + input: "gradients/bert/encoder/layer_5/attention/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_91" + op: "Identity" + input: "clip_by_global_norm/mul_92" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_93" + op: "Mul" + input: "gradients/bert/encoder/layer_5/attention/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_92" + op: "Identity" + input: "clip_by_global_norm/mul_93" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_94" + op: "Mul" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_93" + op: "Identity" + input: "clip_by_global_norm/mul_94" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_95" + op: "Mul" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_94" + op: "Identity" + input: "clip_by_global_norm/mul_95" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_96" + op: "Mul" + input: "gradients/bert/encoder/layer_5/intermediate/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_95" + op: "Identity" + input: "clip_by_global_norm/mul_96" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_97" + op: "Mul" + input: "gradients/bert/encoder/layer_5/intermediate/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_96" + op: "Identity" + input: "clip_by_global_norm/mul_97" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_98" + op: "Mul" + input: "gradients/bert/encoder/layer_5/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_97" + op: "Identity" + input: "clip_by_global_norm/mul_98" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_99" + op: "Mul" + input: "gradients/bert/encoder/layer_5/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_98" + op: "Identity" + input: "clip_by_global_norm/mul_99" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_100" + op: "Mul" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_99" + op: "Identity" + input: "clip_by_global_norm/mul_100" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_101" + op: "Mul" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_100" + op: "Identity" + input: "clip_by_global_norm/mul_101" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_102" + op: "Mul" + input: "gradients/bert/encoder/layer_6/attention/self/query/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_101" + op: "Identity" + input: "clip_by_global_norm/mul_102" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_103" + op: "Mul" + input: "gradients/bert/encoder/layer_6/attention/self/query/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_102" + op: "Identity" + input: "clip_by_global_norm/mul_103" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_104" + op: "Mul" + input: "gradients/bert/encoder/layer_6/attention/self/key/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_103" + op: "Identity" + input: "clip_by_global_norm/mul_104" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_105" + op: "Mul" + input: "gradients/bert/encoder/layer_6/attention/self/key/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_104" + op: "Identity" + input: "clip_by_global_norm/mul_105" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_106" + op: "Mul" + input: "gradients/bert/encoder/layer_6/attention/self/value/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_105" + op: "Identity" + input: "clip_by_global_norm/mul_106" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_107" + op: "Mul" + input: "gradients/bert/encoder/layer_6/attention/self/value/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_106" + op: "Identity" + input: "clip_by_global_norm/mul_107" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_108" + op: "Mul" + input: "gradients/bert/encoder/layer_6/attention/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_107" + op: "Identity" + input: "clip_by_global_norm/mul_108" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_109" + op: "Mul" + input: "gradients/bert/encoder/layer_6/attention/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_108" + op: "Identity" + input: "clip_by_global_norm/mul_109" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_110" + op: "Mul" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_109" + op: "Identity" + input: "clip_by_global_norm/mul_110" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_111" + op: "Mul" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_110" + op: "Identity" + input: "clip_by_global_norm/mul_111" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_112" + op: "Mul" + input: "gradients/bert/encoder/layer_6/intermediate/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_111" + op: "Identity" + input: "clip_by_global_norm/mul_112" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_113" + op: "Mul" + input: "gradients/bert/encoder/layer_6/intermediate/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_112" + op: "Identity" + input: "clip_by_global_norm/mul_113" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_114" + op: "Mul" + input: "gradients/bert/encoder/layer_6/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_113" + op: "Identity" + input: "clip_by_global_norm/mul_114" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_115" + op: "Mul" + input: "gradients/bert/encoder/layer_6/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_114" + op: "Identity" + input: "clip_by_global_norm/mul_115" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_116" + op: "Mul" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_115" + op: "Identity" + input: "clip_by_global_norm/mul_116" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_117" + op: "Mul" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_116" + op: "Identity" + input: "clip_by_global_norm/mul_117" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_118" + op: "Mul" + input: "gradients/bert/encoder/layer_7/attention/self/query/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_117" + op: "Identity" + input: "clip_by_global_norm/mul_118" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_119" + op: "Mul" + input: "gradients/bert/encoder/layer_7/attention/self/query/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_118" + op: "Identity" + input: "clip_by_global_norm/mul_119" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_120" + op: "Mul" + input: "gradients/bert/encoder/layer_7/attention/self/key/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_119" + op: "Identity" + input: "clip_by_global_norm/mul_120" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_121" + op: "Mul" + input: "gradients/bert/encoder/layer_7/attention/self/key/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_120" + op: "Identity" + input: "clip_by_global_norm/mul_121" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_122" + op: "Mul" + input: "gradients/bert/encoder/layer_7/attention/self/value/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_121" + op: "Identity" + input: "clip_by_global_norm/mul_122" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_123" + op: "Mul" + input: "gradients/bert/encoder/layer_7/attention/self/value/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_122" + op: "Identity" + input: "clip_by_global_norm/mul_123" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_124" + op: "Mul" + input: "gradients/bert/encoder/layer_7/attention/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_123" + op: "Identity" + input: "clip_by_global_norm/mul_124" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_125" + op: "Mul" + input: "gradients/bert/encoder/layer_7/attention/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_124" + op: "Identity" + input: "clip_by_global_norm/mul_125" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_126" + op: "Mul" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_125" + op: "Identity" + input: "clip_by_global_norm/mul_126" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_127" + op: "Mul" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_126" + op: "Identity" + input: "clip_by_global_norm/mul_127" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_128" + op: "Mul" + input: "gradients/bert/encoder/layer_7/intermediate/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_127" + op: "Identity" + input: "clip_by_global_norm/mul_128" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_129" + op: "Mul" + input: "gradients/bert/encoder/layer_7/intermediate/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_128" + op: "Identity" + input: "clip_by_global_norm/mul_129" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_130" + op: "Mul" + input: "gradients/bert/encoder/layer_7/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_129" + op: "Identity" + input: "clip_by_global_norm/mul_130" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_131" + op: "Mul" + input: "gradients/bert/encoder/layer_7/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_130" + op: "Identity" + input: "clip_by_global_norm/mul_131" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_132" + op: "Mul" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_131" + op: "Identity" + input: "clip_by_global_norm/mul_132" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_133" + op: "Mul" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_132" + op: "Identity" + input: "clip_by_global_norm/mul_133" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_134" + op: "Mul" + input: "gradients/bert/encoder/layer_8/attention/self/query/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_133" + op: "Identity" + input: "clip_by_global_norm/mul_134" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_135" + op: "Mul" + input: "gradients/bert/encoder/layer_8/attention/self/query/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_134" + op: "Identity" + input: "clip_by_global_norm/mul_135" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_136" + op: "Mul" + input: "gradients/bert/encoder/layer_8/attention/self/key/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_135" + op: "Identity" + input: "clip_by_global_norm/mul_136" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_137" + op: "Mul" + input: "gradients/bert/encoder/layer_8/attention/self/key/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_136" + op: "Identity" + input: "clip_by_global_norm/mul_137" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_138" + op: "Mul" + input: "gradients/bert/encoder/layer_8/attention/self/value/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_137" + op: "Identity" + input: "clip_by_global_norm/mul_138" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_139" + op: "Mul" + input: "gradients/bert/encoder/layer_8/attention/self/value/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_138" + op: "Identity" + input: "clip_by_global_norm/mul_139" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_140" + op: "Mul" + input: "gradients/bert/encoder/layer_8/attention/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_139" + op: "Identity" + input: "clip_by_global_norm/mul_140" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_141" + op: "Mul" + input: "gradients/bert/encoder/layer_8/attention/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_140" + op: "Identity" + input: "clip_by_global_norm/mul_141" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_142" + op: "Mul" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_141" + op: "Identity" + input: "clip_by_global_norm/mul_142" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_143" + op: "Mul" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_142" + op: "Identity" + input: "clip_by_global_norm/mul_143" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_144" + op: "Mul" + input: "gradients/bert/encoder/layer_8/intermediate/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_143" + op: "Identity" + input: "clip_by_global_norm/mul_144" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_145" + op: "Mul" + input: "gradients/bert/encoder/layer_8/intermediate/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_144" + op: "Identity" + input: "clip_by_global_norm/mul_145" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_146" + op: "Mul" + input: "gradients/bert/encoder/layer_8/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_145" + op: "Identity" + input: "clip_by_global_norm/mul_146" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_147" + op: "Mul" + input: "gradients/bert/encoder/layer_8/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_146" + op: "Identity" + input: "clip_by_global_norm/mul_147" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_148" + op: "Mul" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_147" + op: "Identity" + input: "clip_by_global_norm/mul_148" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_149" + op: "Mul" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_148" + op: "Identity" + input: "clip_by_global_norm/mul_149" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_150" + op: "Mul" + input: "gradients/bert/encoder/layer_9/attention/self/query/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_149" + op: "Identity" + input: "clip_by_global_norm/mul_150" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_151" + op: "Mul" + input: "gradients/bert/encoder/layer_9/attention/self/query/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_150" + op: "Identity" + input: "clip_by_global_norm/mul_151" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_152" + op: "Mul" + input: "gradients/bert/encoder/layer_9/attention/self/key/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_151" + op: "Identity" + input: "clip_by_global_norm/mul_152" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_153" + op: "Mul" + input: "gradients/bert/encoder/layer_9/attention/self/key/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_152" + op: "Identity" + input: "clip_by_global_norm/mul_153" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_154" + op: "Mul" + input: "gradients/bert/encoder/layer_9/attention/self/value/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_153" + op: "Identity" + input: "clip_by_global_norm/mul_154" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_155" + op: "Mul" + input: "gradients/bert/encoder/layer_9/attention/self/value/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_154" + op: "Identity" + input: "clip_by_global_norm/mul_155" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_156" + op: "Mul" + input: "gradients/bert/encoder/layer_9/attention/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_155" + op: "Identity" + input: "clip_by_global_norm/mul_156" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_157" + op: "Mul" + input: "gradients/bert/encoder/layer_9/attention/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_156" + op: "Identity" + input: "clip_by_global_norm/mul_157" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_158" + op: "Mul" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_157" + op: "Identity" + input: "clip_by_global_norm/mul_158" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_159" + op: "Mul" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_158" + op: "Identity" + input: "clip_by_global_norm/mul_159" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_160" + op: "Mul" + input: "gradients/bert/encoder/layer_9/intermediate/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_159" + op: "Identity" + input: "clip_by_global_norm/mul_160" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_161" + op: "Mul" + input: "gradients/bert/encoder/layer_9/intermediate/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_160" + op: "Identity" + input: "clip_by_global_norm/mul_161" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_162" + op: "Mul" + input: "gradients/bert/encoder/layer_9/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_161" + op: "Identity" + input: "clip_by_global_norm/mul_162" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_163" + op: "Mul" + input: "gradients/bert/encoder/layer_9/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_162" + op: "Identity" + input: "clip_by_global_norm/mul_163" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_164" + op: "Mul" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_163" + op: "Identity" + input: "clip_by_global_norm/mul_164" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_165" + op: "Mul" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_164" + op: "Identity" + input: "clip_by_global_norm/mul_165" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_166" + op: "Mul" + input: "gradients/bert/encoder/layer_10/attention/self/query/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_165" + op: "Identity" + input: "clip_by_global_norm/mul_166" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_167" + op: "Mul" + input: "gradients/bert/encoder/layer_10/attention/self/query/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_166" + op: "Identity" + input: "clip_by_global_norm/mul_167" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_168" + op: "Mul" + input: "gradients/bert/encoder/layer_10/attention/self/key/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_167" + op: "Identity" + input: "clip_by_global_norm/mul_168" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_169" + op: "Mul" + input: "gradients/bert/encoder/layer_10/attention/self/key/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_168" + op: "Identity" + input: "clip_by_global_norm/mul_169" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_170" + op: "Mul" + input: "gradients/bert/encoder/layer_10/attention/self/value/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_169" + op: "Identity" + input: "clip_by_global_norm/mul_170" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_171" + op: "Mul" + input: "gradients/bert/encoder/layer_10/attention/self/value/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_170" + op: "Identity" + input: "clip_by_global_norm/mul_171" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_172" + op: "Mul" + input: "gradients/bert/encoder/layer_10/attention/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_171" + op: "Identity" + input: "clip_by_global_norm/mul_172" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_173" + op: "Mul" + input: "gradients/bert/encoder/layer_10/attention/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_172" + op: "Identity" + input: "clip_by_global_norm/mul_173" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_174" + op: "Mul" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_173" + op: "Identity" + input: "clip_by_global_norm/mul_174" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_175" + op: "Mul" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_174" + op: "Identity" + input: "clip_by_global_norm/mul_175" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_176" + op: "Mul" + input: "gradients/bert/encoder/layer_10/intermediate/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_175" + op: "Identity" + input: "clip_by_global_norm/mul_176" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_177" + op: "Mul" + input: "gradients/bert/encoder/layer_10/intermediate/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_176" + op: "Identity" + input: "clip_by_global_norm/mul_177" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_178" + op: "Mul" + input: "gradients/bert/encoder/layer_10/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_177" + op: "Identity" + input: "clip_by_global_norm/mul_178" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_179" + op: "Mul" + input: "gradients/bert/encoder/layer_10/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_178" + op: "Identity" + input: "clip_by_global_norm/mul_179" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_180" + op: "Mul" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_179" + op: "Identity" + input: "clip_by_global_norm/mul_180" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_181" + op: "Mul" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_180" + op: "Identity" + input: "clip_by_global_norm/mul_181" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_182" + op: "Mul" + input: "gradients/bert/encoder/layer_11/attention/self/query/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_181" + op: "Identity" + input: "clip_by_global_norm/mul_182" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_183" + op: "Mul" + input: "gradients/bert/encoder/layer_11/attention/self/query/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_182" + op: "Identity" + input: "clip_by_global_norm/mul_183" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_184" + op: "Mul" + input: "gradients/bert/encoder/layer_11/attention/self/key/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_183" + op: "Identity" + input: "clip_by_global_norm/mul_184" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_185" + op: "Mul" + input: "gradients/bert/encoder/layer_11/attention/self/key/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_184" + op: "Identity" + input: "clip_by_global_norm/mul_185" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_186" + op: "Mul" + input: "gradients/bert/encoder/layer_11/attention/self/value/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_185" + op: "Identity" + input: "clip_by_global_norm/mul_186" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_187" + op: "Mul" + input: "gradients/bert/encoder/layer_11/attention/self/value/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_186" + op: "Identity" + input: "clip_by_global_norm/mul_187" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_188" + op: "Mul" + input: "gradients/bert/encoder/layer_11/attention/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_187" + op: "Identity" + input: "clip_by_global_norm/mul_188" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_189" + op: "Mul" + input: "gradients/bert/encoder/layer_11/attention/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_188" + op: "Identity" + input: "clip_by_global_norm/mul_189" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_190" + op: "Mul" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_189" + op: "Identity" + input: "clip_by_global_norm/mul_190" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_191" + op: "Mul" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_190" + op: "Identity" + input: "clip_by_global_norm/mul_191" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_192" + op: "Mul" + input: "gradients/bert/encoder/layer_11/intermediate/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_191" + op: "Identity" + input: "clip_by_global_norm/mul_192" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_193" + op: "Mul" + input: "gradients/bert/encoder/layer_11/intermediate/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_192" + op: "Identity" + input: "clip_by_global_norm/mul_193" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_194" + op: "Mul" + input: "gradients/bert/encoder/layer_11/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_193" + op: "Identity" + input: "clip_by_global_norm/mul_194" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_195" + op: "Mul" + input: "gradients/bert/encoder/layer_11/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_194" + op: "Identity" + input: "clip_by_global_norm/mul_195" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_196" + op: "Mul" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_195" + op: "Identity" + input: "clip_by_global_norm/mul_196" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_197" + op: "Mul" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_196" + op: "Identity" + input: "clip_by_global_norm/mul_197" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_198" + op: "Mul" + input: "gradients/bert/pooler/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/pooler/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_197" + op: "Identity" + input: "clip_by_global_norm/mul_198" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/pooler/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_199" + op: "Mul" + input: "gradients/bert/pooler/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/pooler/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_198" + op: "Identity" + input: "clip_by_global_norm/mul_199" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/pooler/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_200" + op: "Mul" + input: "gradients/loss/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/loss/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_199" + op: "Identity" + input: "clip_by_global_norm/mul_200" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/loss/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_201" + op: "Mul" + input: "gradients/loss/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/loss/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_200" + op: "Identity" + input: "clip_by_global_norm/mul_201" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/loss/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "bert/embeddings/word_embeddings/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\210R\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/embeddings/word_embeddings/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/word_embeddings/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/embeddings/word_embeddings/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/embeddings/word_embeddings/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 21128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/embeddings/word_embeddings/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 21128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 21128 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/word_embeddings/adam_m/Assign" + op: "Assign" + input: "bert/embeddings/word_embeddings/adam_m" + input: "bert/embeddings/word_embeddings/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 21128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/word_embeddings/adam_m/read" + op: "Identity" + input: "bert/embeddings/word_embeddings/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 21128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/word_embeddings/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\210R\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/embeddings/word_embeddings/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/word_embeddings/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/embeddings/word_embeddings/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/embeddings/word_embeddings/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 21128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/embeddings/word_embeddings/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 21128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 21128 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/word_embeddings/adam_v/Assign" + op: "Assign" + input: "bert/embeddings/word_embeddings/adam_v" + input: "bert/embeddings/word_embeddings/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 21128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/word_embeddings/adam_v/read" + op: "Identity" + input: "bert/embeddings/word_embeddings/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 21128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_3/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_3" + op: "Mul" + input: "Mul_3/x" + input: "bert/embeddings/word_embeddings/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 21128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_4/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_4/strided_slice/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "Mul_4/strided_slice/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "Mul_4/strided_slice/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "Mul_4/strided_slice" + op: "StridedSlice" + input: "gradients/bert/embeddings/GatherV2_grad/Cast" + input: "Mul_4/strided_slice/stack" + input: "Mul_4/strided_slice/stack_1" + input: "Mul_4/strided_slice/stack_2" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 1 + } + } +} +node { + name: "Mul_4/y" + op: "UnsortedSegmentSum" + input: "clip_by_global_norm/clip_by_global_norm/_0" + input: "gradients/bert/embeddings/GatherV2_grad/Reshape_1" + input: "Mul_4/strided_slice" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tindices" + value { + type: DT_INT32 + } + } + attr { + key: "Tnumsegments" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 21128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_4" + op: "Mul" + input: "Mul_4/x" + input: "Mul_4/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 21128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_1" + op: "Add" + input: "Mul_3" + input: "Mul_4" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 21128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_5/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_5" + op: "Mul" + input: "Mul_5/x" + input: "bert/embeddings/word_embeddings/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 21128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square/strided_slice/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "Square/strided_slice/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "Square/strided_slice/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "Square/strided_slice" + op: "StridedSlice" + input: "gradients/bert/embeddings/GatherV2_grad/Cast" + input: "Square/strided_slice/stack" + input: "Square/strided_slice/stack_1" + input: "Square/strided_slice/stack_2" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 1 + } + } +} +node { + name: "Square/x" + op: "UnsortedSegmentSum" + input: "clip_by_global_norm/clip_by_global_norm/_0" + input: "gradients/bert/embeddings/GatherV2_grad/Reshape_1" + input: "Square/strided_slice" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tindices" + value { + type: DT_INT32 + } + } + attr { + key: "Tnumsegments" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 21128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square" + op: "Square" + input: "Square/x" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 21128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_6/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_6" + op: "Mul" + input: "Mul_6/x" + input: "Square" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 21128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_2" + op: "Add" + input: "Mul_5" + input: "Mul_6" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 21128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt" + op: "Sqrt" + input: "add_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 21128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_3/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_3" + op: "Add" + input: "Sqrt" + input: "add_3/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 21128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_1" + op: "RealDiv" + input: "add_1" + input: "add_3" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 21128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_7/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_7" + op: "Mul" + input: "mul_7/x" + input: "bert/embeddings/word_embeddings/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 21128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_4" + op: "Add" + input: "truediv_1" + input: "mul_7" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 21128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_8" + op: "Mul" + input: "add" + input: "add_4" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 21128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_1" + op: "Sub" + input: "bert/embeddings/word_embeddings/read" + input: "mul_8" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 21128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_199" + op: "Assign" + input: "bert/embeddings/word_embeddings" + input: "sub_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 21128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_200" + op: "Assign" + input: "bert/embeddings/word_embeddings/adam_m" + input: "add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 21128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_201" + op: "Assign" + input: "bert/embeddings/word_embeddings/adam_v" + input: "add_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 21128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\002\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/embeddings/token_type_embeddings/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/embeddings/token_type_embeddings/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/adam_m/Assign" + op: "Assign" + input: "bert/embeddings/token_type_embeddings/adam_m" + input: "bert/embeddings/token_type_embeddings/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/adam_m/read" + op: "Identity" + input: "bert/embeddings/token_type_embeddings/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\002\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/embeddings/token_type_embeddings/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/embeddings/token_type_embeddings/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/adam_v/Assign" + op: "Assign" + input: "bert/embeddings/token_type_embeddings/adam_v" + input: "bert/embeddings/token_type_embeddings/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/adam_v/read" + op: "Identity" + input: "bert/embeddings/token_type_embeddings/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_9/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_9" + op: "Mul" + input: "Mul_9/x" + input: "bert/embeddings/token_type_embeddings/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_10/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_10" + op: "Mul" + input: "Mul_10/x" + input: "clip_by_global_norm/clip_by_global_norm/_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_5" + op: "Add" + input: "Mul_9" + input: "Mul_10" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_11/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_11" + op: "Mul" + input: "Mul_11/x" + input: "bert/embeddings/token_type_embeddings/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_1" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_12/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_12" + op: "Mul" + input: "Mul_12/x" + input: "Square_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_6" + op: "Add" + input: "Mul_11" + input: "Mul_12" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_1" + op: "Sqrt" + input: "add_6" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_7/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_7" + op: "Add" + input: "Sqrt_1" + input: "add_7/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_2" + op: "RealDiv" + input: "add_5" + input: "add_7" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_13/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_13" + op: "Mul" + input: "mul_13/x" + input: "bert/embeddings/token_type_embeddings/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_8" + op: "Add" + input: "truediv_2" + input: "mul_13" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_14" + op: "Mul" + input: "add" + input: "add_8" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_2" + op: "Sub" + input: "bert/embeddings/token_type_embeddings/read" + input: "mul_14" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_202" + op: "Assign" + input: "bert/embeddings/token_type_embeddings" + input: "sub_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_203" + op: "Assign" + input: "bert/embeddings/token_type_embeddings/adam_m" + input: "add_5" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_204" + op: "Assign" + input: "bert/embeddings/token_type_embeddings/adam_v" + input: "add_6" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/position_embeddings/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\002\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/embeddings/position_embeddings/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/position_embeddings/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/embeddings/position_embeddings/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/embeddings/position_embeddings/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/embeddings/position_embeddings/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/position_embeddings/adam_m/Assign" + op: "Assign" + input: "bert/embeddings/position_embeddings/adam_m" + input: "bert/embeddings/position_embeddings/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/position_embeddings/adam_m/read" + op: "Identity" + input: "bert/embeddings/position_embeddings/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/position_embeddings/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\002\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/embeddings/position_embeddings/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/position_embeddings/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/embeddings/position_embeddings/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/embeddings/position_embeddings/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/embeddings/position_embeddings/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/position_embeddings/adam_v/Assign" + op: "Assign" + input: "bert/embeddings/position_embeddings/adam_v" + input: "bert/embeddings/position_embeddings/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/position_embeddings/adam_v/read" + op: "Identity" + input: "bert/embeddings/position_embeddings/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_15/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_15" + op: "Mul" + input: "Mul_15/x" + input: "bert/embeddings/position_embeddings/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_16/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_16" + op: "Mul" + input: "Mul_16/x" + input: "clip_by_global_norm/clip_by_global_norm/_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_9" + op: "Add" + input: "Mul_15" + input: "Mul_16" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_17/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_17" + op: "Mul" + input: "Mul_17/x" + input: "bert/embeddings/position_embeddings/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_2" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_18/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_18" + op: "Mul" + input: "Mul_18/x" + input: "Square_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_10" + op: "Add" + input: "Mul_17" + input: "Mul_18" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_2" + op: "Sqrt" + input: "add_10" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_11/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_11" + op: "Add" + input: "Sqrt_2" + input: "add_11/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_3" + op: "RealDiv" + input: "add_9" + input: "add_11" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_19/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_19" + op: "Mul" + input: "mul_19/x" + input: "bert/embeddings/position_embeddings/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_12" + op: "Add" + input: "truediv_3" + input: "mul_19" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_20" + op: "Mul" + input: "add" + input: "add_12" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_3" + op: "Sub" + input: "bert/embeddings/position_embeddings/read" + input: "mul_20" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_205" + op: "Assign" + input: "bert/embeddings/position_embeddings" + input: "sub_3" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_206" + op: "Assign" + input: "bert/embeddings/position_embeddings/adam_m" + input: "add_9" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_207" + op: "Assign" + input: "bert/embeddings/position_embeddings/adam_v" + input: "add_10" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/embeddings/LayerNorm/beta/adam_m" + input: "bert/embeddings/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/embeddings/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/embeddings/LayerNorm/beta/adam_v" + input: "bert/embeddings/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/embeddings/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_21/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_21" + op: "Mul" + input: "Mul_21/x" + input: "bert/embeddings/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_22/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_22" + op: "Mul" + input: "Mul_22/x" + input: "clip_by_global_norm/clip_by_global_norm/_3" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_13" + op: "Add" + input: "Mul_21" + input: "Mul_22" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_23/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_23" + op: "Mul" + input: "Mul_23/x" + input: "bert/embeddings/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_3" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_3" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_24/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_24" + op: "Mul" + input: "Mul_24/x" + input: "Square_3" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_14" + op: "Add" + input: "Mul_23" + input: "Mul_24" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_3" + op: "Sqrt" + input: "add_14" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_15/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_15" + op: "Add" + input: "Sqrt_3" + input: "add_15/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_4" + op: "RealDiv" + input: "add_13" + input: "add_15" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_25" + op: "Mul" + input: "add" + input: "truediv_4" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_4" + op: "Sub" + input: "bert/embeddings/LayerNorm/beta/read" + input: "mul_25" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_208" + op: "Assign" + input: "bert/embeddings/LayerNorm/beta" + input: "sub_4" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_209" + op: "Assign" + input: "bert/embeddings/LayerNorm/beta/adam_m" + input: "add_13" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_210" + op: "Assign" + input: "bert/embeddings/LayerNorm/beta/adam_v" + input: "add_14" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/embeddings/LayerNorm/gamma/adam_m" + input: "bert/embeddings/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/embeddings/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/embeddings/LayerNorm/gamma/adam_v" + input: "bert/embeddings/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/embeddings/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_26/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_26" + op: "Mul" + input: "Mul_26/x" + input: "bert/embeddings/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_27/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_27" + op: "Mul" + input: "Mul_27/x" + input: "clip_by_global_norm/clip_by_global_norm/_4" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_16" + op: "Add" + input: "Mul_26" + input: "Mul_27" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_28/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_28" + op: "Mul" + input: "Mul_28/x" + input: "bert/embeddings/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_4" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_4" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_29/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_29" + op: "Mul" + input: "Mul_29/x" + input: "Square_4" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_17" + op: "Add" + input: "Mul_28" + input: "Mul_29" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_4" + op: "Sqrt" + input: "add_17" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_18/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_18" + op: "Add" + input: "Sqrt_4" + input: "add_18/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_5" + op: "RealDiv" + input: "add_16" + input: "add_18" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_30" + op: "Mul" + input: "add" + input: "truediv_5" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_5" + op: "Sub" + input: "bert/embeddings/LayerNorm/gamma/read" + input: "mul_30" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_211" + op: "Assign" + input: "bert/embeddings/LayerNorm/gamma" + input: "sub_5" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_212" + op: "Assign" + input: "bert/embeddings/LayerNorm/gamma/adam_m" + input: "add_16" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_213" + op: "Assign" + input: "bert/embeddings/LayerNorm/gamma/adam_v" + input: "add_17" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_31/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_31" + op: "Mul" + input: "Mul_31/x" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_32/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_32" + op: "Mul" + input: "Mul_32/x" + input: "clip_by_global_norm/clip_by_global_norm/_5" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_19" + op: "Add" + input: "Mul_31" + input: "Mul_32" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_33/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_33" + op: "Mul" + input: "Mul_33/x" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_5" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_5" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_34/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_34" + op: "Mul" + input: "Mul_34/x" + input: "Square_5" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_20" + op: "Add" + input: "Mul_33" + input: "Mul_34" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_5" + op: "Sqrt" + input: "add_20" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_21/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_21" + op: "Add" + input: "Sqrt_5" + input: "add_21/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_6" + op: "RealDiv" + input: "add_19" + input: "add_21" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_35/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_35" + op: "Mul" + input: "mul_35/x" + input: "bert/encoder/layer_0/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_22" + op: "Add" + input: "truediv_6" + input: "mul_35" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_36" + op: "Mul" + input: "add" + input: "add_22" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_6" + op: "Sub" + input: "bert/encoder/layer_0/attention/self/query/kernel/read" + input: "mul_36" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_214" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/query/kernel" + input: "sub_6" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_215" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_m" + input: "add_19" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_216" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_v" + input: "add_20" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_0/attention/self/query/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/self/query/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_0/attention/self/query/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/self/query/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_37/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_37" + op: "Mul" + input: "Mul_37/x" + input: "bert/encoder/layer_0/attention/self/query/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_38/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_38" + op: "Mul" + input: "Mul_38/x" + input: "clip_by_global_norm/clip_by_global_norm/_6" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_23" + op: "Add" + input: "Mul_37" + input: "Mul_38" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_39/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_39" + op: "Mul" + input: "Mul_39/x" + input: "bert/encoder/layer_0/attention/self/query/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_6" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_6" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_40/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_40" + op: "Mul" + input: "Mul_40/x" + input: "Square_6" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_24" + op: "Add" + input: "Mul_39" + input: "Mul_40" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_6" + op: "Sqrt" + input: "add_24" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_25/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_25" + op: "Add" + input: "Sqrt_6" + input: "add_25/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_7" + op: "RealDiv" + input: "add_23" + input: "add_25" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_41" + op: "Mul" + input: "add" + input: "truediv_7" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_7" + op: "Sub" + input: "bert/encoder/layer_0/attention/self/query/bias/read" + input: "mul_41" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_217" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/query/bias" + input: "sub_7" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_218" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/query/bias/adam_m" + input: "add_23" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_219" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/query/bias/adam_v" + input: "add_24" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_42/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_42" + op: "Mul" + input: "Mul_42/x" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_43/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_43" + op: "Mul" + input: "Mul_43/x" + input: "clip_by_global_norm/clip_by_global_norm/_7" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_26" + op: "Add" + input: "Mul_42" + input: "Mul_43" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_44/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_44" + op: "Mul" + input: "Mul_44/x" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_7" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_7" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_45/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_45" + op: "Mul" + input: "Mul_45/x" + input: "Square_7" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_27" + op: "Add" + input: "Mul_44" + input: "Mul_45" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_7" + op: "Sqrt" + input: "add_27" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_28/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_28" + op: "Add" + input: "Sqrt_7" + input: "add_28/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_8" + op: "RealDiv" + input: "add_26" + input: "add_28" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_46/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_46" + op: "Mul" + input: "mul_46/x" + input: "bert/encoder/layer_0/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_29" + op: "Add" + input: "truediv_8" + input: "mul_46" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_47" + op: "Mul" + input: "add" + input: "add_29" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_8" + op: "Sub" + input: "bert/encoder/layer_0/attention/self/key/kernel/read" + input: "mul_47" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_220" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/key/kernel" + input: "sub_8" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_221" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_m" + input: "add_26" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_222" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_v" + input: "add_27" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_0/attention/self/key/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/self/key/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_0/attention/self/key/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/self/key/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_48/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_48" + op: "Mul" + input: "Mul_48/x" + input: "bert/encoder/layer_0/attention/self/key/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_49/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_49" + op: "Mul" + input: "Mul_49/x" + input: "clip_by_global_norm/clip_by_global_norm/_8" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_30" + op: "Add" + input: "Mul_48" + input: "Mul_49" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_50/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_50" + op: "Mul" + input: "Mul_50/x" + input: "bert/encoder/layer_0/attention/self/key/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_8" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_8" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_51/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_51" + op: "Mul" + input: "Mul_51/x" + input: "Square_8" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_31" + op: "Add" + input: "Mul_50" + input: "Mul_51" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_8" + op: "Sqrt" + input: "add_31" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_32/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_32" + op: "Add" + input: "Sqrt_8" + input: "add_32/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_9" + op: "RealDiv" + input: "add_30" + input: "add_32" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_52" + op: "Mul" + input: "add" + input: "truediv_9" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_9" + op: "Sub" + input: "bert/encoder/layer_0/attention/self/key/bias/read" + input: "mul_52" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_223" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/key/bias" + input: "sub_9" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_224" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/key/bias/adam_m" + input: "add_30" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_225" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/key/bias/adam_v" + input: "add_31" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_53/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_53" + op: "Mul" + input: "Mul_53/x" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_54/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_54" + op: "Mul" + input: "Mul_54/x" + input: "clip_by_global_norm/clip_by_global_norm/_9" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_33" + op: "Add" + input: "Mul_53" + input: "Mul_54" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_55/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_55" + op: "Mul" + input: "Mul_55/x" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_9" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_9" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_56/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_56" + op: "Mul" + input: "Mul_56/x" + input: "Square_9" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_34" + op: "Add" + input: "Mul_55" + input: "Mul_56" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_9" + op: "Sqrt" + input: "add_34" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_35/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_35" + op: "Add" + input: "Sqrt_9" + input: "add_35/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_10" + op: "RealDiv" + input: "add_33" + input: "add_35" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_57/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_57" + op: "Mul" + input: "mul_57/x" + input: "bert/encoder/layer_0/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_36" + op: "Add" + input: "truediv_10" + input: "mul_57" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_58" + op: "Mul" + input: "add" + input: "add_36" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_10" + op: "Sub" + input: "bert/encoder/layer_0/attention/self/value/kernel/read" + input: "mul_58" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_226" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/value/kernel" + input: "sub_10" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_227" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_m" + input: "add_33" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_228" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_v" + input: "add_34" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_0/attention/self/value/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/self/value/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_0/attention/self/value/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/self/value/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_59/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_59" + op: "Mul" + input: "Mul_59/x" + input: "bert/encoder/layer_0/attention/self/value/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_60/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_60" + op: "Mul" + input: "Mul_60/x" + input: "clip_by_global_norm/clip_by_global_norm/_10" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_37" + op: "Add" + input: "Mul_59" + input: "Mul_60" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_61/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_61" + op: "Mul" + input: "Mul_61/x" + input: "bert/encoder/layer_0/attention/self/value/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_10" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_10" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_62/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_62" + op: "Mul" + input: "Mul_62/x" + input: "Square_10" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_38" + op: "Add" + input: "Mul_61" + input: "Mul_62" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_10" + op: "Sqrt" + input: "add_38" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_39/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_39" + op: "Add" + input: "Sqrt_10" + input: "add_39/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_11" + op: "RealDiv" + input: "add_37" + input: "add_39" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_63" + op: "Mul" + input: "add" + input: "truediv_11" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_11" + op: "Sub" + input: "bert/encoder/layer_0/attention/self/value/bias/read" + input: "mul_63" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_229" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/value/bias" + input: "sub_11" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_230" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/value/bias/adam_m" + input: "add_37" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_231" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/value/bias/adam_v" + input: "add_38" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_64/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_64" + op: "Mul" + input: "Mul_64/x" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_65/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_65" + op: "Mul" + input: "Mul_65/x" + input: "clip_by_global_norm/clip_by_global_norm/_11" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_40" + op: "Add" + input: "Mul_64" + input: "Mul_65" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_66/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_66" + op: "Mul" + input: "Mul_66/x" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_11" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_11" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_67/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_67" + op: "Mul" + input: "Mul_67/x" + input: "Square_11" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_41" + op: "Add" + input: "Mul_66" + input: "Mul_67" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_11" + op: "Sqrt" + input: "add_41" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_42/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_42" + op: "Add" + input: "Sqrt_11" + input: "add_42/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_12" + op: "RealDiv" + input: "add_40" + input: "add_42" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_68/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_68" + op: "Mul" + input: "mul_68/x" + input: "bert/encoder/layer_0/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_43" + op: "Add" + input: "truediv_12" + input: "mul_68" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_69" + op: "Mul" + input: "add" + input: "add_43" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_12" + op: "Sub" + input: "bert/encoder/layer_0/attention/output/dense/kernel/read" + input: "mul_69" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_232" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/dense/kernel" + input: "sub_12" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_233" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + input: "add_40" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_234" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + input: "add_41" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_0/attention/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_0/attention/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_70/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_70" + op: "Mul" + input: "Mul_70/x" + input: "bert/encoder/layer_0/attention/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_71/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_71" + op: "Mul" + input: "Mul_71/x" + input: "clip_by_global_norm/clip_by_global_norm/_12" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_44" + op: "Add" + input: "Mul_70" + input: "Mul_71" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_72/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_72" + op: "Mul" + input: "Mul_72/x" + input: "bert/encoder/layer_0/attention/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_12" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_12" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_73/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_73" + op: "Mul" + input: "Mul_73/x" + input: "Square_12" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_45" + op: "Add" + input: "Mul_72" + input: "Mul_73" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_12" + op: "Sqrt" + input: "add_45" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_46/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_46" + op: "Add" + input: "Sqrt_12" + input: "add_46/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_13" + op: "RealDiv" + input: "add_44" + input: "add_46" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_74" + op: "Mul" + input: "add" + input: "truediv_13" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_13" + op: "Sub" + input: "bert/encoder/layer_0/attention/output/dense/bias/read" + input: "mul_74" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_235" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/dense/bias" + input: "sub_13" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_236" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/dense/bias/adam_m" + input: "add_44" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_237" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/dense/bias/adam_v" + input: "add_45" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_75/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_75" + op: "Mul" + input: "Mul_75/x" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_76/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_76" + op: "Mul" + input: "Mul_76/x" + input: "clip_by_global_norm/clip_by_global_norm/_13" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_47" + op: "Add" + input: "Mul_75" + input: "Mul_76" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_77/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_77" + op: "Mul" + input: "Mul_77/x" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_13" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_13" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_78/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_78" + op: "Mul" + input: "Mul_78/x" + input: "Square_13" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_48" + op: "Add" + input: "Mul_77" + input: "Mul_78" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_13" + op: "Sqrt" + input: "add_48" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_49/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_49" + op: "Add" + input: "Sqrt_13" + input: "add_49/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_14" + op: "RealDiv" + input: "add_47" + input: "add_49" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_79" + op: "Mul" + input: "add" + input: "truediv_14" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_14" + op: "Sub" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/read" + input: "mul_79" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_238" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta" + input: "sub_14" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_239" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m" + input: "add_47" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_240" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v" + input: "add_48" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_80/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_80" + op: "Mul" + input: "Mul_80/x" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_81/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_81" + op: "Mul" + input: "Mul_81/x" + input: "clip_by_global_norm/clip_by_global_norm/_14" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_50" + op: "Add" + input: "Mul_80" + input: "Mul_81" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_82/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_82" + op: "Mul" + input: "Mul_82/x" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_14" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_14" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_83/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_83" + op: "Mul" + input: "Mul_83/x" + input: "Square_14" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_51" + op: "Add" + input: "Mul_82" + input: "Mul_83" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_14" + op: "Sqrt" + input: "add_51" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_52/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_52" + op: "Add" + input: "Sqrt_14" + input: "add_52/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_15" + op: "RealDiv" + input: "add_50" + input: "add_52" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_84" + op: "Mul" + input: "add" + input: "truediv_15" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_15" + op: "Sub" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/read" + input: "mul_84" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_241" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma" + input: "sub_15" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_242" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m" + input: "add_50" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_243" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v" + input: "add_51" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_85/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_85" + op: "Mul" + input: "Mul_85/x" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_86/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_86" + op: "Mul" + input: "Mul_86/x" + input: "clip_by_global_norm/clip_by_global_norm/_15" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_53" + op: "Add" + input: "Mul_85" + input: "Mul_86" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_87/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_87" + op: "Mul" + input: "Mul_87/x" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_15" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_15" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_88/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_88" + op: "Mul" + input: "Mul_88/x" + input: "Square_15" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_54" + op: "Add" + input: "Mul_87" + input: "Mul_88" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_15" + op: "Sqrt" + input: "add_54" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_55/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_55" + op: "Add" + input: "Sqrt_15" + input: "add_55/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_16" + op: "RealDiv" + input: "add_53" + input: "add_55" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_89/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_89" + op: "Mul" + input: "mul_89/x" + input: "bert/encoder/layer_0/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_56" + op: "Add" + input: "truediv_16" + input: "mul_89" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_90" + op: "Mul" + input: "add" + input: "add_56" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_16" + op: "Sub" + input: "bert/encoder/layer_0/intermediate/dense/kernel/read" + input: "mul_90" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_244" + op: "Assign" + input: "bert/encoder/layer_0/intermediate/dense/kernel" + input: "sub_16" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_245" + op: "Assign" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + input: "add_53" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_246" + op: "Assign" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + input: "add_54" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/bias/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/bias/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_91/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_91" + op: "Mul" + input: "Mul_91/x" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_92/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_92" + op: "Mul" + input: "Mul_92/x" + input: "clip_by_global_norm/clip_by_global_norm/_16" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_57" + op: "Add" + input: "Mul_91" + input: "Mul_92" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_93/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_93" + op: "Mul" + input: "Mul_93/x" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_16" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_16" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_94/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_94" + op: "Mul" + input: "Mul_94/x" + input: "Square_16" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_58" + op: "Add" + input: "Mul_93" + input: "Mul_94" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_16" + op: "Sqrt" + input: "add_58" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_59/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_59" + op: "Add" + input: "Sqrt_16" + input: "add_59/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_17" + op: "RealDiv" + input: "add_57" + input: "add_59" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_95" + op: "Mul" + input: "add" + input: "truediv_17" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_17" + op: "Sub" + input: "bert/encoder/layer_0/intermediate/dense/bias/read" + input: "mul_95" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_247" + op: "Assign" + input: "bert/encoder/layer_0/intermediate/dense/bias" + input: "sub_17" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_248" + op: "Assign" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_m" + input: "add_57" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_249" + op: "Assign" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_v" + input: "add_58" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_0/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_0/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_0/output/dense/kernel/adam_m" + input: "bert/encoder/layer_0/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_0/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_0/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_0/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_0/output/dense/kernel/adam_v" + input: "bert/encoder/layer_0/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_0/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_96/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_96" + op: "Mul" + input: "Mul_96/x" + input: "bert/encoder/layer_0/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_97/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_97" + op: "Mul" + input: "Mul_97/x" + input: "clip_by_global_norm/clip_by_global_norm/_17" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_60" + op: "Add" + input: "Mul_96" + input: "Mul_97" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_98/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_98" + op: "Mul" + input: "Mul_98/x" + input: "bert/encoder/layer_0/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_17" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_17" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_99/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_99" + op: "Mul" + input: "Mul_99/x" + input: "Square_17" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_61" + op: "Add" + input: "Mul_98" + input: "Mul_99" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_17" + op: "Sqrt" + input: "add_61" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_62/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_62" + op: "Add" + input: "Sqrt_17" + input: "add_62/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_18" + op: "RealDiv" + input: "add_60" + input: "add_62" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_100/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_100" + op: "Mul" + input: "mul_100/x" + input: "bert/encoder/layer_0/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_63" + op: "Add" + input: "truediv_18" + input: "mul_100" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_101" + op: "Mul" + input: "add" + input: "add_63" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_18" + op: "Sub" + input: "bert/encoder/layer_0/output/dense/kernel/read" + input: "mul_101" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_250" + op: "Assign" + input: "bert/encoder/layer_0/output/dense/kernel" + input: "sub_18" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_251" + op: "Assign" + input: "bert/encoder/layer_0/output/dense/kernel/adam_m" + input: "add_60" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_252" + op: "Assign" + input: "bert/encoder/layer_0/output/dense/kernel/adam_v" + input: "add_61" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_0/output/dense/bias/adam_m" + input: "bert/encoder/layer_0/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_0/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_0/output/dense/bias/adam_v" + input: "bert/encoder/layer_0/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_0/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_102/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_102" + op: "Mul" + input: "Mul_102/x" + input: "bert/encoder/layer_0/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_103/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_103" + op: "Mul" + input: "Mul_103/x" + input: "clip_by_global_norm/clip_by_global_norm/_18" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_64" + op: "Add" + input: "Mul_102" + input: "Mul_103" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_104/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_104" + op: "Mul" + input: "Mul_104/x" + input: "bert/encoder/layer_0/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_18" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_18" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_105/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_105" + op: "Mul" + input: "Mul_105/x" + input: "Square_18" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_65" + op: "Add" + input: "Mul_104" + input: "Mul_105" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_18" + op: "Sqrt" + input: "add_65" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_66/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_66" + op: "Add" + input: "Sqrt_18" + input: "add_66/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_19" + op: "RealDiv" + input: "add_64" + input: "add_66" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_106" + op: "Mul" + input: "add" + input: "truediv_19" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_19" + op: "Sub" + input: "bert/encoder/layer_0/output/dense/bias/read" + input: "mul_106" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_253" + op: "Assign" + input: "bert/encoder/layer_0/output/dense/bias" + input: "sub_19" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_254" + op: "Assign" + input: "bert/encoder/layer_0/output/dense/bias/adam_m" + input: "add_64" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_255" + op: "Assign" + input: "bert/encoder/layer_0/output/dense/bias/adam_v" + input: "add_65" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_107/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_107" + op: "Mul" + input: "Mul_107/x" + input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_108/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_108" + op: "Mul" + input: "Mul_108/x" + input: "clip_by_global_norm/clip_by_global_norm/_19" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_67" + op: "Add" + input: "Mul_107" + input: "Mul_108" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_109/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_109" + op: "Mul" + input: "Mul_109/x" + input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_19" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_19" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_110/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_110" + op: "Mul" + input: "Mul_110/x" + input: "Square_19" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_68" + op: "Add" + input: "Mul_109" + input: "Mul_110" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_19" + op: "Sqrt" + input: "add_68" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_69/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_69" + op: "Add" + input: "Sqrt_19" + input: "add_69/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_20" + op: "RealDiv" + input: "add_67" + input: "add_69" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_111" + op: "Mul" + input: "add" + input: "truediv_20" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_20" + op: "Sub" + input: "bert/encoder/layer_0/output/LayerNorm/beta/read" + input: "mul_111" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_256" + op: "Assign" + input: "bert/encoder/layer_0/output/LayerNorm/beta" + input: "sub_20" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_257" + op: "Assign" + input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m" + input: "add_67" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_258" + op: "Assign" + input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v" + input: "add_68" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_112/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_112" + op: "Mul" + input: "Mul_112/x" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_113/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_113" + op: "Mul" + input: "Mul_113/x" + input: "clip_by_global_norm/clip_by_global_norm/_20" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_70" + op: "Add" + input: "Mul_112" + input: "Mul_113" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_114/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_114" + op: "Mul" + input: "Mul_114/x" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_20" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_20" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_115/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_115" + op: "Mul" + input: "Mul_115/x" + input: "Square_20" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_71" + op: "Add" + input: "Mul_114" + input: "Mul_115" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_20" + op: "Sqrt" + input: "add_71" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_72/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_72" + op: "Add" + input: "Sqrt_20" + input: "add_72/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_21" + op: "RealDiv" + input: "add_70" + input: "add_72" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_116" + op: "Mul" + input: "add" + input: "truediv_21" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_21" + op: "Sub" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/read" + input: "mul_116" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_259" + op: "Assign" + input: "bert/encoder/layer_0/output/LayerNorm/gamma" + input: "sub_21" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_260" + op: "Assign" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m" + input: "add_70" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_261" + op: "Assign" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v" + input: "add_71" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_117/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_117" + op: "Mul" + input: "Mul_117/x" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_118/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_118" + op: "Mul" + input: "Mul_118/x" + input: "clip_by_global_norm/clip_by_global_norm/_21" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_73" + op: "Add" + input: "Mul_117" + input: "Mul_118" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_119/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_119" + op: "Mul" + input: "Mul_119/x" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_21" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_21" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_120/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_120" + op: "Mul" + input: "Mul_120/x" + input: "Square_21" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_74" + op: "Add" + input: "Mul_119" + input: "Mul_120" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_21" + op: "Sqrt" + input: "add_74" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_75/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_75" + op: "Add" + input: "Sqrt_21" + input: "add_75/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_22" + op: "RealDiv" + input: "add_73" + input: "add_75" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_121/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_121" + op: "Mul" + input: "mul_121/x" + input: "bert/encoder/layer_1/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_76" + op: "Add" + input: "truediv_22" + input: "mul_121" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_122" + op: "Mul" + input: "add" + input: "add_76" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_22" + op: "Sub" + input: "bert/encoder/layer_1/attention/self/query/kernel/read" + input: "mul_122" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_262" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/query/kernel" + input: "sub_22" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_263" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_m" + input: "add_73" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_264" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_v" + input: "add_74" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_1/attention/self/query/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/self/query/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_1/attention/self/query/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/self/query/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_123/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_123" + op: "Mul" + input: "Mul_123/x" + input: "bert/encoder/layer_1/attention/self/query/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_124/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_124" + op: "Mul" + input: "Mul_124/x" + input: "clip_by_global_norm/clip_by_global_norm/_22" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_77" + op: "Add" + input: "Mul_123" + input: "Mul_124" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_125/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_125" + op: "Mul" + input: "Mul_125/x" + input: "bert/encoder/layer_1/attention/self/query/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_22" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_22" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_126/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_126" + op: "Mul" + input: "Mul_126/x" + input: "Square_22" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_78" + op: "Add" + input: "Mul_125" + input: "Mul_126" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_22" + op: "Sqrt" + input: "add_78" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_79/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_79" + op: "Add" + input: "Sqrt_22" + input: "add_79/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_23" + op: "RealDiv" + input: "add_77" + input: "add_79" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_127" + op: "Mul" + input: "add" + input: "truediv_23" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_23" + op: "Sub" + input: "bert/encoder/layer_1/attention/self/query/bias/read" + input: "mul_127" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_265" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/query/bias" + input: "sub_23" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_266" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/query/bias/adam_m" + input: "add_77" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_267" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/query/bias/adam_v" + input: "add_78" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_128/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_128" + op: "Mul" + input: "Mul_128/x" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_129/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_129" + op: "Mul" + input: "Mul_129/x" + input: "clip_by_global_norm/clip_by_global_norm/_23" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_80" + op: "Add" + input: "Mul_128" + input: "Mul_129" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_130/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_130" + op: "Mul" + input: "Mul_130/x" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_23" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_23" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_131/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_131" + op: "Mul" + input: "Mul_131/x" + input: "Square_23" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_81" + op: "Add" + input: "Mul_130" + input: "Mul_131" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_23" + op: "Sqrt" + input: "add_81" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_82/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_82" + op: "Add" + input: "Sqrt_23" + input: "add_82/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_24" + op: "RealDiv" + input: "add_80" + input: "add_82" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_132/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_132" + op: "Mul" + input: "mul_132/x" + input: "bert/encoder/layer_1/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_83" + op: "Add" + input: "truediv_24" + input: "mul_132" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_133" + op: "Mul" + input: "add" + input: "add_83" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_24" + op: "Sub" + input: "bert/encoder/layer_1/attention/self/key/kernel/read" + input: "mul_133" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_268" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/key/kernel" + input: "sub_24" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_269" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_m" + input: "add_80" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_270" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_v" + input: "add_81" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_1/attention/self/key/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/self/key/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_1/attention/self/key/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/self/key/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_134/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_134" + op: "Mul" + input: "Mul_134/x" + input: "bert/encoder/layer_1/attention/self/key/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_135/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_135" + op: "Mul" + input: "Mul_135/x" + input: "clip_by_global_norm/clip_by_global_norm/_24" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_84" + op: "Add" + input: "Mul_134" + input: "Mul_135" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_136/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_136" + op: "Mul" + input: "Mul_136/x" + input: "bert/encoder/layer_1/attention/self/key/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_24" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_24" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_137/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_137" + op: "Mul" + input: "Mul_137/x" + input: "Square_24" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_85" + op: "Add" + input: "Mul_136" + input: "Mul_137" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_24" + op: "Sqrt" + input: "add_85" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_86/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_86" + op: "Add" + input: "Sqrt_24" + input: "add_86/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_25" + op: "RealDiv" + input: "add_84" + input: "add_86" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_138" + op: "Mul" + input: "add" + input: "truediv_25" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_25" + op: "Sub" + input: "bert/encoder/layer_1/attention/self/key/bias/read" + input: "mul_138" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_271" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/key/bias" + input: "sub_25" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_272" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/key/bias/adam_m" + input: "add_84" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_273" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/key/bias/adam_v" + input: "add_85" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_139/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_139" + op: "Mul" + input: "Mul_139/x" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_140/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_140" + op: "Mul" + input: "Mul_140/x" + input: "clip_by_global_norm/clip_by_global_norm/_25" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_87" + op: "Add" + input: "Mul_139" + input: "Mul_140" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_141/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_141" + op: "Mul" + input: "Mul_141/x" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_25" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_25" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_142/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_142" + op: "Mul" + input: "Mul_142/x" + input: "Square_25" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_88" + op: "Add" + input: "Mul_141" + input: "Mul_142" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_25" + op: "Sqrt" + input: "add_88" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_89/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_89" + op: "Add" + input: "Sqrt_25" + input: "add_89/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_26" + op: "RealDiv" + input: "add_87" + input: "add_89" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_143/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_143" + op: "Mul" + input: "mul_143/x" + input: "bert/encoder/layer_1/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_90" + op: "Add" + input: "truediv_26" + input: "mul_143" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_144" + op: "Mul" + input: "add" + input: "add_90" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_26" + op: "Sub" + input: "bert/encoder/layer_1/attention/self/value/kernel/read" + input: "mul_144" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_274" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/value/kernel" + input: "sub_26" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_275" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_m" + input: "add_87" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_276" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_v" + input: "add_88" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_1/attention/self/value/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/self/value/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_1/attention/self/value/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/self/value/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_145/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_145" + op: "Mul" + input: "Mul_145/x" + input: "bert/encoder/layer_1/attention/self/value/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_146/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_146" + op: "Mul" + input: "Mul_146/x" + input: "clip_by_global_norm/clip_by_global_norm/_26" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_91" + op: "Add" + input: "Mul_145" + input: "Mul_146" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_147/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_147" + op: "Mul" + input: "Mul_147/x" + input: "bert/encoder/layer_1/attention/self/value/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_26" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_26" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_148/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_148" + op: "Mul" + input: "Mul_148/x" + input: "Square_26" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_92" + op: "Add" + input: "Mul_147" + input: "Mul_148" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_26" + op: "Sqrt" + input: "add_92" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_93/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_93" + op: "Add" + input: "Sqrt_26" + input: "add_93/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_27" + op: "RealDiv" + input: "add_91" + input: "add_93" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_149" + op: "Mul" + input: "add" + input: "truediv_27" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_27" + op: "Sub" + input: "bert/encoder/layer_1/attention/self/value/bias/read" + input: "mul_149" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_277" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/value/bias" + input: "sub_27" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_278" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/value/bias/adam_m" + input: "add_91" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_279" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/value/bias/adam_v" + input: "add_92" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_150/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_150" + op: "Mul" + input: "Mul_150/x" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_151/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_151" + op: "Mul" + input: "Mul_151/x" + input: "clip_by_global_norm/clip_by_global_norm/_27" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_94" + op: "Add" + input: "Mul_150" + input: "Mul_151" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_152/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_152" + op: "Mul" + input: "Mul_152/x" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_27" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_27" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_153/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_153" + op: "Mul" + input: "Mul_153/x" + input: "Square_27" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_95" + op: "Add" + input: "Mul_152" + input: "Mul_153" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_27" + op: "Sqrt" + input: "add_95" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_96/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_96" + op: "Add" + input: "Sqrt_27" + input: "add_96/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_28" + op: "RealDiv" + input: "add_94" + input: "add_96" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_154/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_154" + op: "Mul" + input: "mul_154/x" + input: "bert/encoder/layer_1/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_97" + op: "Add" + input: "truediv_28" + input: "mul_154" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_155" + op: "Mul" + input: "add" + input: "add_97" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_28" + op: "Sub" + input: "bert/encoder/layer_1/attention/output/dense/kernel/read" + input: "mul_155" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_280" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/dense/kernel" + input: "sub_28" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_281" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + input: "add_94" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_282" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + input: "add_95" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_1/attention/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_1/attention/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_156/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_156" + op: "Mul" + input: "Mul_156/x" + input: "bert/encoder/layer_1/attention/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_157/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_157" + op: "Mul" + input: "Mul_157/x" + input: "clip_by_global_norm/clip_by_global_norm/_28" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_98" + op: "Add" + input: "Mul_156" + input: "Mul_157" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_158/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_158" + op: "Mul" + input: "Mul_158/x" + input: "bert/encoder/layer_1/attention/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_28" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_28" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_159/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_159" + op: "Mul" + input: "Mul_159/x" + input: "Square_28" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_99" + op: "Add" + input: "Mul_158" + input: "Mul_159" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_28" + op: "Sqrt" + input: "add_99" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_100/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_100" + op: "Add" + input: "Sqrt_28" + input: "add_100/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_29" + op: "RealDiv" + input: "add_98" + input: "add_100" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_160" + op: "Mul" + input: "add" + input: "truediv_29" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_29" + op: "Sub" + input: "bert/encoder/layer_1/attention/output/dense/bias/read" + input: "mul_160" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_283" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/dense/bias" + input: "sub_29" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_284" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/dense/bias/adam_m" + input: "add_98" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_285" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/dense/bias/adam_v" + input: "add_99" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_161/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_161" + op: "Mul" + input: "Mul_161/x" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_162/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_162" + op: "Mul" + input: "Mul_162/x" + input: "clip_by_global_norm/clip_by_global_norm/_29" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_101" + op: "Add" + input: "Mul_161" + input: "Mul_162" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_163/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_163" + op: "Mul" + input: "Mul_163/x" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_29" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_29" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_164/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_164" + op: "Mul" + input: "Mul_164/x" + input: "Square_29" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_102" + op: "Add" + input: "Mul_163" + input: "Mul_164" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_29" + op: "Sqrt" + input: "add_102" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_103/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_103" + op: "Add" + input: "Sqrt_29" + input: "add_103/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_30" + op: "RealDiv" + input: "add_101" + input: "add_103" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_165" + op: "Mul" + input: "add" + input: "truediv_30" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_30" + op: "Sub" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/read" + input: "mul_165" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_286" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta" + input: "sub_30" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_287" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m" + input: "add_101" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_288" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v" + input: "add_102" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_166/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_166" + op: "Mul" + input: "Mul_166/x" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_167/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_167" + op: "Mul" + input: "Mul_167/x" + input: "clip_by_global_norm/clip_by_global_norm/_30" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_104" + op: "Add" + input: "Mul_166" + input: "Mul_167" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_168/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_168" + op: "Mul" + input: "Mul_168/x" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_30" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_30" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_169/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_169" + op: "Mul" + input: "Mul_169/x" + input: "Square_30" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_105" + op: "Add" + input: "Mul_168" + input: "Mul_169" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_30" + op: "Sqrt" + input: "add_105" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_106/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_106" + op: "Add" + input: "Sqrt_30" + input: "add_106/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_31" + op: "RealDiv" + input: "add_104" + input: "add_106" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_170" + op: "Mul" + input: "add" + input: "truediv_31" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_31" + op: "Sub" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/read" + input: "mul_170" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_289" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma" + input: "sub_31" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_290" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m" + input: "add_104" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_291" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v" + input: "add_105" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_171/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_171" + op: "Mul" + input: "Mul_171/x" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_172/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_172" + op: "Mul" + input: "Mul_172/x" + input: "clip_by_global_norm/clip_by_global_norm/_31" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_107" + op: "Add" + input: "Mul_171" + input: "Mul_172" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_173/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_173" + op: "Mul" + input: "Mul_173/x" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_31" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_31" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_174/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_174" + op: "Mul" + input: "Mul_174/x" + input: "Square_31" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_108" + op: "Add" + input: "Mul_173" + input: "Mul_174" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_31" + op: "Sqrt" + input: "add_108" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_109/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_109" + op: "Add" + input: "Sqrt_31" + input: "add_109/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_32" + op: "RealDiv" + input: "add_107" + input: "add_109" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_175/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_175" + op: "Mul" + input: "mul_175/x" + input: "bert/encoder/layer_1/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_110" + op: "Add" + input: "truediv_32" + input: "mul_175" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_176" + op: "Mul" + input: "add" + input: "add_110" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_32" + op: "Sub" + input: "bert/encoder/layer_1/intermediate/dense/kernel/read" + input: "mul_176" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_292" + op: "Assign" + input: "bert/encoder/layer_1/intermediate/dense/kernel" + input: "sub_32" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_293" + op: "Assign" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + input: "add_107" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_294" + op: "Assign" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + input: "add_108" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/bias/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/bias/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_177/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_177" + op: "Mul" + input: "Mul_177/x" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_178/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_178" + op: "Mul" + input: "Mul_178/x" + input: "clip_by_global_norm/clip_by_global_norm/_32" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_111" + op: "Add" + input: "Mul_177" + input: "Mul_178" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_179/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_179" + op: "Mul" + input: "Mul_179/x" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_32" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_32" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_180/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_180" + op: "Mul" + input: "Mul_180/x" + input: "Square_32" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_112" + op: "Add" + input: "Mul_179" + input: "Mul_180" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_32" + op: "Sqrt" + input: "add_112" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_113/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_113" + op: "Add" + input: "Sqrt_32" + input: "add_113/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_33" + op: "RealDiv" + input: "add_111" + input: "add_113" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_181" + op: "Mul" + input: "add" + input: "truediv_33" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_33" + op: "Sub" + input: "bert/encoder/layer_1/intermediate/dense/bias/read" + input: "mul_181" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_295" + op: "Assign" + input: "bert/encoder/layer_1/intermediate/dense/bias" + input: "sub_33" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_296" + op: "Assign" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_m" + input: "add_111" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_297" + op: "Assign" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_v" + input: "add_112" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_1/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_1/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_1/output/dense/kernel/adam_m" + input: "bert/encoder/layer_1/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_1/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_1/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_1/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_1/output/dense/kernel/adam_v" + input: "bert/encoder/layer_1/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_1/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_182/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_182" + op: "Mul" + input: "Mul_182/x" + input: "bert/encoder/layer_1/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_183/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_183" + op: "Mul" + input: "Mul_183/x" + input: "clip_by_global_norm/clip_by_global_norm/_33" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_114" + op: "Add" + input: "Mul_182" + input: "Mul_183" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_184/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_184" + op: "Mul" + input: "Mul_184/x" + input: "bert/encoder/layer_1/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_33" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_33" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_185/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_185" + op: "Mul" + input: "Mul_185/x" + input: "Square_33" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_115" + op: "Add" + input: "Mul_184" + input: "Mul_185" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_33" + op: "Sqrt" + input: "add_115" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_116/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_116" + op: "Add" + input: "Sqrt_33" + input: "add_116/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_34" + op: "RealDiv" + input: "add_114" + input: "add_116" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_186/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_186" + op: "Mul" + input: "mul_186/x" + input: "bert/encoder/layer_1/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_117" + op: "Add" + input: "truediv_34" + input: "mul_186" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_187" + op: "Mul" + input: "add" + input: "add_117" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_34" + op: "Sub" + input: "bert/encoder/layer_1/output/dense/kernel/read" + input: "mul_187" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_298" + op: "Assign" + input: "bert/encoder/layer_1/output/dense/kernel" + input: "sub_34" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_299" + op: "Assign" + input: "bert/encoder/layer_1/output/dense/kernel/adam_m" + input: "add_114" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_300" + op: "Assign" + input: "bert/encoder/layer_1/output/dense/kernel/adam_v" + input: "add_115" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_1/output/dense/bias/adam_m" + input: "bert/encoder/layer_1/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_1/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_1/output/dense/bias/adam_v" + input: "bert/encoder/layer_1/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_1/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_188/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_188" + op: "Mul" + input: "Mul_188/x" + input: "bert/encoder/layer_1/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_189/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_189" + op: "Mul" + input: "Mul_189/x" + input: "clip_by_global_norm/clip_by_global_norm/_34" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_118" + op: "Add" + input: "Mul_188" + input: "Mul_189" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_190/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_190" + op: "Mul" + input: "Mul_190/x" + input: "bert/encoder/layer_1/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_34" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_34" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_191/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_191" + op: "Mul" + input: "Mul_191/x" + input: "Square_34" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_119" + op: "Add" + input: "Mul_190" + input: "Mul_191" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_34" + op: "Sqrt" + input: "add_119" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_120/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_120" + op: "Add" + input: "Sqrt_34" + input: "add_120/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_35" + op: "RealDiv" + input: "add_118" + input: "add_120" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_192" + op: "Mul" + input: "add" + input: "truediv_35" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_35" + op: "Sub" + input: "bert/encoder/layer_1/output/dense/bias/read" + input: "mul_192" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_301" + op: "Assign" + input: "bert/encoder/layer_1/output/dense/bias" + input: "sub_35" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_302" + op: "Assign" + input: "bert/encoder/layer_1/output/dense/bias/adam_m" + input: "add_118" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_303" + op: "Assign" + input: "bert/encoder/layer_1/output/dense/bias/adam_v" + input: "add_119" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_193/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_193" + op: "Mul" + input: "Mul_193/x" + input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_194/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_194" + op: "Mul" + input: "Mul_194/x" + input: "clip_by_global_norm/clip_by_global_norm/_35" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_121" + op: "Add" + input: "Mul_193" + input: "Mul_194" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_195/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_195" + op: "Mul" + input: "Mul_195/x" + input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_35" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_35" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_196/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_196" + op: "Mul" + input: "Mul_196/x" + input: "Square_35" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_122" + op: "Add" + input: "Mul_195" + input: "Mul_196" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_35" + op: "Sqrt" + input: "add_122" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_123/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_123" + op: "Add" + input: "Sqrt_35" + input: "add_123/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_36" + op: "RealDiv" + input: "add_121" + input: "add_123" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_197" + op: "Mul" + input: "add" + input: "truediv_36" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_36" + op: "Sub" + input: "bert/encoder/layer_1/output/LayerNorm/beta/read" + input: "mul_197" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_304" + op: "Assign" + input: "bert/encoder/layer_1/output/LayerNorm/beta" + input: "sub_36" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_305" + op: "Assign" + input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m" + input: "add_121" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_306" + op: "Assign" + input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v" + input: "add_122" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_198/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_198" + op: "Mul" + input: "Mul_198/x" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_199/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_199" + op: "Mul" + input: "Mul_199/x" + input: "clip_by_global_norm/clip_by_global_norm/_36" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_124" + op: "Add" + input: "Mul_198" + input: "Mul_199" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_200/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_200" + op: "Mul" + input: "Mul_200/x" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_36" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_36" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_201/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_201" + op: "Mul" + input: "Mul_201/x" + input: "Square_36" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_125" + op: "Add" + input: "Mul_200" + input: "Mul_201" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_36" + op: "Sqrt" + input: "add_125" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_126/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_126" + op: "Add" + input: "Sqrt_36" + input: "add_126/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_37" + op: "RealDiv" + input: "add_124" + input: "add_126" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_202" + op: "Mul" + input: "add" + input: "truediv_37" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_37" + op: "Sub" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/read" + input: "mul_202" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_307" + op: "Assign" + input: "bert/encoder/layer_1/output/LayerNorm/gamma" + input: "sub_37" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_308" + op: "Assign" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m" + input: "add_124" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_309" + op: "Assign" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v" + input: "add_125" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_203/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_203" + op: "Mul" + input: "Mul_203/x" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_204/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_204" + op: "Mul" + input: "Mul_204/x" + input: "clip_by_global_norm/clip_by_global_norm/_37" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_127" + op: "Add" + input: "Mul_203" + input: "Mul_204" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_205/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_205" + op: "Mul" + input: "Mul_205/x" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_37" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_37" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_206/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_206" + op: "Mul" + input: "Mul_206/x" + input: "Square_37" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_128" + op: "Add" + input: "Mul_205" + input: "Mul_206" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_37" + op: "Sqrt" + input: "add_128" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_129/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_129" + op: "Add" + input: "Sqrt_37" + input: "add_129/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_38" + op: "RealDiv" + input: "add_127" + input: "add_129" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_207/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_207" + op: "Mul" + input: "mul_207/x" + input: "bert/encoder/layer_2/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_130" + op: "Add" + input: "truediv_38" + input: "mul_207" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_208" + op: "Mul" + input: "add" + input: "add_130" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_38" + op: "Sub" + input: "bert/encoder/layer_2/attention/self/query/kernel/read" + input: "mul_208" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_310" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/query/kernel" + input: "sub_38" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_311" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_m" + input: "add_127" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_312" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_v" + input: "add_128" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_2/attention/self/query/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/self/query/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_2/attention/self/query/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/self/query/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_209/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_209" + op: "Mul" + input: "Mul_209/x" + input: "bert/encoder/layer_2/attention/self/query/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_210/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_210" + op: "Mul" + input: "Mul_210/x" + input: "clip_by_global_norm/clip_by_global_norm/_38" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_131" + op: "Add" + input: "Mul_209" + input: "Mul_210" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_211/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_211" + op: "Mul" + input: "Mul_211/x" + input: "bert/encoder/layer_2/attention/self/query/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_38" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_38" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_212/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_212" + op: "Mul" + input: "Mul_212/x" + input: "Square_38" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_132" + op: "Add" + input: "Mul_211" + input: "Mul_212" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_38" + op: "Sqrt" + input: "add_132" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_133/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_133" + op: "Add" + input: "Sqrt_38" + input: "add_133/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_39" + op: "RealDiv" + input: "add_131" + input: "add_133" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_213" + op: "Mul" + input: "add" + input: "truediv_39" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_39" + op: "Sub" + input: "bert/encoder/layer_2/attention/self/query/bias/read" + input: "mul_213" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_313" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/query/bias" + input: "sub_39" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_314" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/query/bias/adam_m" + input: "add_131" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_315" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/query/bias/adam_v" + input: "add_132" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_214/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_214" + op: "Mul" + input: "Mul_214/x" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_215/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_215" + op: "Mul" + input: "Mul_215/x" + input: "clip_by_global_norm/clip_by_global_norm/_39" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_134" + op: "Add" + input: "Mul_214" + input: "Mul_215" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_216/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_216" + op: "Mul" + input: "Mul_216/x" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_39" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_39" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_217/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_217" + op: "Mul" + input: "Mul_217/x" + input: "Square_39" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_135" + op: "Add" + input: "Mul_216" + input: "Mul_217" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_39" + op: "Sqrt" + input: "add_135" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_136/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_136" + op: "Add" + input: "Sqrt_39" + input: "add_136/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_40" + op: "RealDiv" + input: "add_134" + input: "add_136" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_218/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_218" + op: "Mul" + input: "mul_218/x" + input: "bert/encoder/layer_2/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_137" + op: "Add" + input: "truediv_40" + input: "mul_218" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_219" + op: "Mul" + input: "add" + input: "add_137" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_40" + op: "Sub" + input: "bert/encoder/layer_2/attention/self/key/kernel/read" + input: "mul_219" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_316" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/key/kernel" + input: "sub_40" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_317" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_m" + input: "add_134" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_318" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_v" + input: "add_135" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_2/attention/self/key/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/self/key/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_2/attention/self/key/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/self/key/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_220/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_220" + op: "Mul" + input: "Mul_220/x" + input: "bert/encoder/layer_2/attention/self/key/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_221/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_221" + op: "Mul" + input: "Mul_221/x" + input: "clip_by_global_norm/clip_by_global_norm/_40" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_138" + op: "Add" + input: "Mul_220" + input: "Mul_221" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_222/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_222" + op: "Mul" + input: "Mul_222/x" + input: "bert/encoder/layer_2/attention/self/key/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_40" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_40" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_223/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_223" + op: "Mul" + input: "Mul_223/x" + input: "Square_40" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_139" + op: "Add" + input: "Mul_222" + input: "Mul_223" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_40" + op: "Sqrt" + input: "add_139" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_140/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_140" + op: "Add" + input: "Sqrt_40" + input: "add_140/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_41" + op: "RealDiv" + input: "add_138" + input: "add_140" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_224" + op: "Mul" + input: "add" + input: "truediv_41" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_41" + op: "Sub" + input: "bert/encoder/layer_2/attention/self/key/bias/read" + input: "mul_224" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_319" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/key/bias" + input: "sub_41" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_320" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/key/bias/adam_m" + input: "add_138" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_321" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/key/bias/adam_v" + input: "add_139" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_225/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_225" + op: "Mul" + input: "Mul_225/x" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_226/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_226" + op: "Mul" + input: "Mul_226/x" + input: "clip_by_global_norm/clip_by_global_norm/_41" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_141" + op: "Add" + input: "Mul_225" + input: "Mul_226" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_227/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_227" + op: "Mul" + input: "Mul_227/x" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_41" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_41" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_228/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_228" + op: "Mul" + input: "Mul_228/x" + input: "Square_41" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_142" + op: "Add" + input: "Mul_227" + input: "Mul_228" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_41" + op: "Sqrt" + input: "add_142" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_143/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_143" + op: "Add" + input: "Sqrt_41" + input: "add_143/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_42" + op: "RealDiv" + input: "add_141" + input: "add_143" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_229/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_229" + op: "Mul" + input: "mul_229/x" + input: "bert/encoder/layer_2/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_144" + op: "Add" + input: "truediv_42" + input: "mul_229" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_230" + op: "Mul" + input: "add" + input: "add_144" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_42" + op: "Sub" + input: "bert/encoder/layer_2/attention/self/value/kernel/read" + input: "mul_230" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_322" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/value/kernel" + input: "sub_42" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_323" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_m" + input: "add_141" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_324" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_v" + input: "add_142" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_2/attention/self/value/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/self/value/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_2/attention/self/value/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/self/value/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_231/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_231" + op: "Mul" + input: "Mul_231/x" + input: "bert/encoder/layer_2/attention/self/value/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_232/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_232" + op: "Mul" + input: "Mul_232/x" + input: "clip_by_global_norm/clip_by_global_norm/_42" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_145" + op: "Add" + input: "Mul_231" + input: "Mul_232" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_233/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_233" + op: "Mul" + input: "Mul_233/x" + input: "bert/encoder/layer_2/attention/self/value/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_42" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_42" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_234/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_234" + op: "Mul" + input: "Mul_234/x" + input: "Square_42" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_146" + op: "Add" + input: "Mul_233" + input: "Mul_234" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_42" + op: "Sqrt" + input: "add_146" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_147/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_147" + op: "Add" + input: "Sqrt_42" + input: "add_147/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_43" + op: "RealDiv" + input: "add_145" + input: "add_147" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_235" + op: "Mul" + input: "add" + input: "truediv_43" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_43" + op: "Sub" + input: "bert/encoder/layer_2/attention/self/value/bias/read" + input: "mul_235" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_325" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/value/bias" + input: "sub_43" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_326" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/value/bias/adam_m" + input: "add_145" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_327" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/value/bias/adam_v" + input: "add_146" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_236/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_236" + op: "Mul" + input: "Mul_236/x" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_237/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_237" + op: "Mul" + input: "Mul_237/x" + input: "clip_by_global_norm/clip_by_global_norm/_43" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_148" + op: "Add" + input: "Mul_236" + input: "Mul_237" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_238/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_238" + op: "Mul" + input: "Mul_238/x" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_43" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_43" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_239/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_239" + op: "Mul" + input: "Mul_239/x" + input: "Square_43" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_149" + op: "Add" + input: "Mul_238" + input: "Mul_239" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_43" + op: "Sqrt" + input: "add_149" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_150/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_150" + op: "Add" + input: "Sqrt_43" + input: "add_150/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_44" + op: "RealDiv" + input: "add_148" + input: "add_150" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_240/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_240" + op: "Mul" + input: "mul_240/x" + input: "bert/encoder/layer_2/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_151" + op: "Add" + input: "truediv_44" + input: "mul_240" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_241" + op: "Mul" + input: "add" + input: "add_151" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_44" + op: "Sub" + input: "bert/encoder/layer_2/attention/output/dense/kernel/read" + input: "mul_241" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_328" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/dense/kernel" + input: "sub_44" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_329" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + input: "add_148" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_330" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + input: "add_149" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_2/attention/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_2/attention/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_242/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_242" + op: "Mul" + input: "Mul_242/x" + input: "bert/encoder/layer_2/attention/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_243/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_243" + op: "Mul" + input: "Mul_243/x" + input: "clip_by_global_norm/clip_by_global_norm/_44" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_152" + op: "Add" + input: "Mul_242" + input: "Mul_243" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_244/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_244" + op: "Mul" + input: "Mul_244/x" + input: "bert/encoder/layer_2/attention/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_44" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_44" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_245/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_245" + op: "Mul" + input: "Mul_245/x" + input: "Square_44" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_153" + op: "Add" + input: "Mul_244" + input: "Mul_245" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_44" + op: "Sqrt" + input: "add_153" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_154/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_154" + op: "Add" + input: "Sqrt_44" + input: "add_154/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_45" + op: "RealDiv" + input: "add_152" + input: "add_154" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_246" + op: "Mul" + input: "add" + input: "truediv_45" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_45" + op: "Sub" + input: "bert/encoder/layer_2/attention/output/dense/bias/read" + input: "mul_246" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_331" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/dense/bias" + input: "sub_45" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_332" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/dense/bias/adam_m" + input: "add_152" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_333" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/dense/bias/adam_v" + input: "add_153" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_247/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_247" + op: "Mul" + input: "Mul_247/x" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_248/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_248" + op: "Mul" + input: "Mul_248/x" + input: "clip_by_global_norm/clip_by_global_norm/_45" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_155" + op: "Add" + input: "Mul_247" + input: "Mul_248" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_249/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_249" + op: "Mul" + input: "Mul_249/x" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_45" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_45" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_250/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_250" + op: "Mul" + input: "Mul_250/x" + input: "Square_45" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_156" + op: "Add" + input: "Mul_249" + input: "Mul_250" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_45" + op: "Sqrt" + input: "add_156" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_157/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_157" + op: "Add" + input: "Sqrt_45" + input: "add_157/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_46" + op: "RealDiv" + input: "add_155" + input: "add_157" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_251" + op: "Mul" + input: "add" + input: "truediv_46" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_46" + op: "Sub" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/read" + input: "mul_251" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_334" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta" + input: "sub_46" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_335" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m" + input: "add_155" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_336" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v" + input: "add_156" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_252/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_252" + op: "Mul" + input: "Mul_252/x" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_253/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_253" + op: "Mul" + input: "Mul_253/x" + input: "clip_by_global_norm/clip_by_global_norm/_46" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_158" + op: "Add" + input: "Mul_252" + input: "Mul_253" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_254/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_254" + op: "Mul" + input: "Mul_254/x" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_46" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_46" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_255/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_255" + op: "Mul" + input: "Mul_255/x" + input: "Square_46" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_159" + op: "Add" + input: "Mul_254" + input: "Mul_255" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_46" + op: "Sqrt" + input: "add_159" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_160/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_160" + op: "Add" + input: "Sqrt_46" + input: "add_160/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_47" + op: "RealDiv" + input: "add_158" + input: "add_160" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_256" + op: "Mul" + input: "add" + input: "truediv_47" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_47" + op: "Sub" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/read" + input: "mul_256" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_337" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma" + input: "sub_47" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_338" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m" + input: "add_158" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_339" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v" + input: "add_159" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_257/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_257" + op: "Mul" + input: "Mul_257/x" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_258/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_258" + op: "Mul" + input: "Mul_258/x" + input: "clip_by_global_norm/clip_by_global_norm/_47" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_161" + op: "Add" + input: "Mul_257" + input: "Mul_258" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_259/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_259" + op: "Mul" + input: "Mul_259/x" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_47" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_47" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_260/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_260" + op: "Mul" + input: "Mul_260/x" + input: "Square_47" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_162" + op: "Add" + input: "Mul_259" + input: "Mul_260" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_47" + op: "Sqrt" + input: "add_162" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_163/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_163" + op: "Add" + input: "Sqrt_47" + input: "add_163/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_48" + op: "RealDiv" + input: "add_161" + input: "add_163" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_261/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_261" + op: "Mul" + input: "mul_261/x" + input: "bert/encoder/layer_2/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_164" + op: "Add" + input: "truediv_48" + input: "mul_261" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_262" + op: "Mul" + input: "add" + input: "add_164" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_48" + op: "Sub" + input: "bert/encoder/layer_2/intermediate/dense/kernel/read" + input: "mul_262" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_340" + op: "Assign" + input: "bert/encoder/layer_2/intermediate/dense/kernel" + input: "sub_48" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_341" + op: "Assign" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + input: "add_161" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_342" + op: "Assign" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + input: "add_162" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/bias/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/bias/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_263/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_263" + op: "Mul" + input: "Mul_263/x" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_264/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_264" + op: "Mul" + input: "Mul_264/x" + input: "clip_by_global_norm/clip_by_global_norm/_48" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_165" + op: "Add" + input: "Mul_263" + input: "Mul_264" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_265/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_265" + op: "Mul" + input: "Mul_265/x" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_48" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_48" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_266/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_266" + op: "Mul" + input: "Mul_266/x" + input: "Square_48" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_166" + op: "Add" + input: "Mul_265" + input: "Mul_266" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_48" + op: "Sqrt" + input: "add_166" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_167/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_167" + op: "Add" + input: "Sqrt_48" + input: "add_167/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_49" + op: "RealDiv" + input: "add_165" + input: "add_167" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_267" + op: "Mul" + input: "add" + input: "truediv_49" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_49" + op: "Sub" + input: "bert/encoder/layer_2/intermediate/dense/bias/read" + input: "mul_267" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_343" + op: "Assign" + input: "bert/encoder/layer_2/intermediate/dense/bias" + input: "sub_49" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_344" + op: "Assign" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_m" + input: "add_165" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_345" + op: "Assign" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_v" + input: "add_166" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_2/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_2/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_2/output/dense/kernel/adam_m" + input: "bert/encoder/layer_2/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_2/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_2/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_2/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_2/output/dense/kernel/adam_v" + input: "bert/encoder/layer_2/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_2/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_268/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_268" + op: "Mul" + input: "Mul_268/x" + input: "bert/encoder/layer_2/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_269/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_269" + op: "Mul" + input: "Mul_269/x" + input: "clip_by_global_norm/clip_by_global_norm/_49" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_168" + op: "Add" + input: "Mul_268" + input: "Mul_269" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_270/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_270" + op: "Mul" + input: "Mul_270/x" + input: "bert/encoder/layer_2/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_49" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_49" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_271/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_271" + op: "Mul" + input: "Mul_271/x" + input: "Square_49" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_169" + op: "Add" + input: "Mul_270" + input: "Mul_271" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_49" + op: "Sqrt" + input: "add_169" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_170/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_170" + op: "Add" + input: "Sqrt_49" + input: "add_170/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_50" + op: "RealDiv" + input: "add_168" + input: "add_170" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_272/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_272" + op: "Mul" + input: "mul_272/x" + input: "bert/encoder/layer_2/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_171" + op: "Add" + input: "truediv_50" + input: "mul_272" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_273" + op: "Mul" + input: "add" + input: "add_171" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_50" + op: "Sub" + input: "bert/encoder/layer_2/output/dense/kernel/read" + input: "mul_273" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_346" + op: "Assign" + input: "bert/encoder/layer_2/output/dense/kernel" + input: "sub_50" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_347" + op: "Assign" + input: "bert/encoder/layer_2/output/dense/kernel/adam_m" + input: "add_168" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_348" + op: "Assign" + input: "bert/encoder/layer_2/output/dense/kernel/adam_v" + input: "add_169" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_2/output/dense/bias/adam_m" + input: "bert/encoder/layer_2/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_2/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_2/output/dense/bias/adam_v" + input: "bert/encoder/layer_2/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_2/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_274/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_274" + op: "Mul" + input: "Mul_274/x" + input: "bert/encoder/layer_2/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_275/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_275" + op: "Mul" + input: "Mul_275/x" + input: "clip_by_global_norm/clip_by_global_norm/_50" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_172" + op: "Add" + input: "Mul_274" + input: "Mul_275" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_276/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_276" + op: "Mul" + input: "Mul_276/x" + input: "bert/encoder/layer_2/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_50" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_50" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_277/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_277" + op: "Mul" + input: "Mul_277/x" + input: "Square_50" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_173" + op: "Add" + input: "Mul_276" + input: "Mul_277" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_50" + op: "Sqrt" + input: "add_173" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_174/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_174" + op: "Add" + input: "Sqrt_50" + input: "add_174/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_51" + op: "RealDiv" + input: "add_172" + input: "add_174" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_278" + op: "Mul" + input: "add" + input: "truediv_51" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_51" + op: "Sub" + input: "bert/encoder/layer_2/output/dense/bias/read" + input: "mul_278" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_349" + op: "Assign" + input: "bert/encoder/layer_2/output/dense/bias" + input: "sub_51" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_350" + op: "Assign" + input: "bert/encoder/layer_2/output/dense/bias/adam_m" + input: "add_172" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_351" + op: "Assign" + input: "bert/encoder/layer_2/output/dense/bias/adam_v" + input: "add_173" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_279/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_279" + op: "Mul" + input: "Mul_279/x" + input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_280/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_280" + op: "Mul" + input: "Mul_280/x" + input: "clip_by_global_norm/clip_by_global_norm/_51" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_175" + op: "Add" + input: "Mul_279" + input: "Mul_280" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_281/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_281" + op: "Mul" + input: "Mul_281/x" + input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_51" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_51" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_282/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_282" + op: "Mul" + input: "Mul_282/x" + input: "Square_51" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_176" + op: "Add" + input: "Mul_281" + input: "Mul_282" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_51" + op: "Sqrt" + input: "add_176" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_177/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_177" + op: "Add" + input: "Sqrt_51" + input: "add_177/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_52" + op: "RealDiv" + input: "add_175" + input: "add_177" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_283" + op: "Mul" + input: "add" + input: "truediv_52" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_52" + op: "Sub" + input: "bert/encoder/layer_2/output/LayerNorm/beta/read" + input: "mul_283" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_352" + op: "Assign" + input: "bert/encoder/layer_2/output/LayerNorm/beta" + input: "sub_52" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_353" + op: "Assign" + input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m" + input: "add_175" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_354" + op: "Assign" + input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v" + input: "add_176" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_284/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_284" + op: "Mul" + input: "Mul_284/x" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_285/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_285" + op: "Mul" + input: "Mul_285/x" + input: "clip_by_global_norm/clip_by_global_norm/_52" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_178" + op: "Add" + input: "Mul_284" + input: "Mul_285" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_286/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_286" + op: "Mul" + input: "Mul_286/x" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_52" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_52" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_287/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_287" + op: "Mul" + input: "Mul_287/x" + input: "Square_52" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_179" + op: "Add" + input: "Mul_286" + input: "Mul_287" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_52" + op: "Sqrt" + input: "add_179" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_180/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_180" + op: "Add" + input: "Sqrt_52" + input: "add_180/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_53" + op: "RealDiv" + input: "add_178" + input: "add_180" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_288" + op: "Mul" + input: "add" + input: "truediv_53" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_53" + op: "Sub" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/read" + input: "mul_288" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_355" + op: "Assign" + input: "bert/encoder/layer_2/output/LayerNorm/gamma" + input: "sub_53" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_356" + op: "Assign" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m" + input: "add_178" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_357" + op: "Assign" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v" + input: "add_179" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_289/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_289" + op: "Mul" + input: "Mul_289/x" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_290/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_290" + op: "Mul" + input: "Mul_290/x" + input: "clip_by_global_norm/clip_by_global_norm/_53" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_181" + op: "Add" + input: "Mul_289" + input: "Mul_290" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_291/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_291" + op: "Mul" + input: "Mul_291/x" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_53" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_53" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_292/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_292" + op: "Mul" + input: "Mul_292/x" + input: "Square_53" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_182" + op: "Add" + input: "Mul_291" + input: "Mul_292" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_53" + op: "Sqrt" + input: "add_182" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_183/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_183" + op: "Add" + input: "Sqrt_53" + input: "add_183/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_54" + op: "RealDiv" + input: "add_181" + input: "add_183" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_293/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_293" + op: "Mul" + input: "mul_293/x" + input: "bert/encoder/layer_3/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_184" + op: "Add" + input: "truediv_54" + input: "mul_293" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_294" + op: "Mul" + input: "add" + input: "add_184" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_54" + op: "Sub" + input: "bert/encoder/layer_3/attention/self/query/kernel/read" + input: "mul_294" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_358" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/query/kernel" + input: "sub_54" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_359" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_m" + input: "add_181" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_360" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_v" + input: "add_182" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_3/attention/self/query/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/self/query/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_3/attention/self/query/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/self/query/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_295/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_295" + op: "Mul" + input: "Mul_295/x" + input: "bert/encoder/layer_3/attention/self/query/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_296/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_296" + op: "Mul" + input: "Mul_296/x" + input: "clip_by_global_norm/clip_by_global_norm/_54" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_185" + op: "Add" + input: "Mul_295" + input: "Mul_296" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_297/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_297" + op: "Mul" + input: "Mul_297/x" + input: "bert/encoder/layer_3/attention/self/query/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_54" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_54" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_298/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_298" + op: "Mul" + input: "Mul_298/x" + input: "Square_54" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_186" + op: "Add" + input: "Mul_297" + input: "Mul_298" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_54" + op: "Sqrt" + input: "add_186" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_187/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_187" + op: "Add" + input: "Sqrt_54" + input: "add_187/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_55" + op: "RealDiv" + input: "add_185" + input: "add_187" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_299" + op: "Mul" + input: "add" + input: "truediv_55" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_55" + op: "Sub" + input: "bert/encoder/layer_3/attention/self/query/bias/read" + input: "mul_299" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_361" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/query/bias" + input: "sub_55" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_362" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/query/bias/adam_m" + input: "add_185" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_363" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/query/bias/adam_v" + input: "add_186" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_300/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_300" + op: "Mul" + input: "Mul_300/x" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_301/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_301" + op: "Mul" + input: "Mul_301/x" + input: "clip_by_global_norm/clip_by_global_norm/_55" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_188" + op: "Add" + input: "Mul_300" + input: "Mul_301" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_302/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_302" + op: "Mul" + input: "Mul_302/x" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_55" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_55" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_303/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_303" + op: "Mul" + input: "Mul_303/x" + input: "Square_55" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_189" + op: "Add" + input: "Mul_302" + input: "Mul_303" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_55" + op: "Sqrt" + input: "add_189" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_190/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_190" + op: "Add" + input: "Sqrt_55" + input: "add_190/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_56" + op: "RealDiv" + input: "add_188" + input: "add_190" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_304/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_304" + op: "Mul" + input: "mul_304/x" + input: "bert/encoder/layer_3/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_191" + op: "Add" + input: "truediv_56" + input: "mul_304" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_305" + op: "Mul" + input: "add" + input: "add_191" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_56" + op: "Sub" + input: "bert/encoder/layer_3/attention/self/key/kernel/read" + input: "mul_305" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_364" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/key/kernel" + input: "sub_56" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_365" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_m" + input: "add_188" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_366" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_v" + input: "add_189" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_3/attention/self/key/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/self/key/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_3/attention/self/key/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/self/key/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_306/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_306" + op: "Mul" + input: "Mul_306/x" + input: "bert/encoder/layer_3/attention/self/key/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_307/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_307" + op: "Mul" + input: "Mul_307/x" + input: "clip_by_global_norm/clip_by_global_norm/_56" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_192" + op: "Add" + input: "Mul_306" + input: "Mul_307" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_308/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_308" + op: "Mul" + input: "Mul_308/x" + input: "bert/encoder/layer_3/attention/self/key/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_56" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_56" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_309/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_309" + op: "Mul" + input: "Mul_309/x" + input: "Square_56" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_193" + op: "Add" + input: "Mul_308" + input: "Mul_309" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_56" + op: "Sqrt" + input: "add_193" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_194/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_194" + op: "Add" + input: "Sqrt_56" + input: "add_194/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_57" + op: "RealDiv" + input: "add_192" + input: "add_194" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_310" + op: "Mul" + input: "add" + input: "truediv_57" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_57" + op: "Sub" + input: "bert/encoder/layer_3/attention/self/key/bias/read" + input: "mul_310" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_367" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/key/bias" + input: "sub_57" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_368" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/key/bias/adam_m" + input: "add_192" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_369" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/key/bias/adam_v" + input: "add_193" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_311/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_311" + op: "Mul" + input: "Mul_311/x" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_312/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_312" + op: "Mul" + input: "Mul_312/x" + input: "clip_by_global_norm/clip_by_global_norm/_57" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_195" + op: "Add" + input: "Mul_311" + input: "Mul_312" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_313/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_313" + op: "Mul" + input: "Mul_313/x" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_57" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_57" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_314/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_314" + op: "Mul" + input: "Mul_314/x" + input: "Square_57" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_196" + op: "Add" + input: "Mul_313" + input: "Mul_314" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_57" + op: "Sqrt" + input: "add_196" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_197/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_197" + op: "Add" + input: "Sqrt_57" + input: "add_197/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_58" + op: "RealDiv" + input: "add_195" + input: "add_197" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_315/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_315" + op: "Mul" + input: "mul_315/x" + input: "bert/encoder/layer_3/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_198" + op: "Add" + input: "truediv_58" + input: "mul_315" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_316" + op: "Mul" + input: "add" + input: "add_198" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_58" + op: "Sub" + input: "bert/encoder/layer_3/attention/self/value/kernel/read" + input: "mul_316" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_370" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/value/kernel" + input: "sub_58" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_371" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_m" + input: "add_195" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_372" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_v" + input: "add_196" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_3/attention/self/value/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/self/value/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_3/attention/self/value/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/self/value/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_317/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_317" + op: "Mul" + input: "Mul_317/x" + input: "bert/encoder/layer_3/attention/self/value/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_318/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_318" + op: "Mul" + input: "Mul_318/x" + input: "clip_by_global_norm/clip_by_global_norm/_58" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_199" + op: "Add" + input: "Mul_317" + input: "Mul_318" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_319/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_319" + op: "Mul" + input: "Mul_319/x" + input: "bert/encoder/layer_3/attention/self/value/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_58" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_58" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_320/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_320" + op: "Mul" + input: "Mul_320/x" + input: "Square_58" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_200" + op: "Add" + input: "Mul_319" + input: "Mul_320" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_58" + op: "Sqrt" + input: "add_200" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_201/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_201" + op: "Add" + input: "Sqrt_58" + input: "add_201/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_59" + op: "RealDiv" + input: "add_199" + input: "add_201" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_321" + op: "Mul" + input: "add" + input: "truediv_59" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_59" + op: "Sub" + input: "bert/encoder/layer_3/attention/self/value/bias/read" + input: "mul_321" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_373" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/value/bias" + input: "sub_59" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_374" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/value/bias/adam_m" + input: "add_199" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_375" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/value/bias/adam_v" + input: "add_200" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_322/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_322" + op: "Mul" + input: "Mul_322/x" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_323/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_323" + op: "Mul" + input: "Mul_323/x" + input: "clip_by_global_norm/clip_by_global_norm/_59" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_202" + op: "Add" + input: "Mul_322" + input: "Mul_323" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_324/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_324" + op: "Mul" + input: "Mul_324/x" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_59" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_59" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_325/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_325" + op: "Mul" + input: "Mul_325/x" + input: "Square_59" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_203" + op: "Add" + input: "Mul_324" + input: "Mul_325" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_59" + op: "Sqrt" + input: "add_203" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_204/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_204" + op: "Add" + input: "Sqrt_59" + input: "add_204/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_60" + op: "RealDiv" + input: "add_202" + input: "add_204" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_326/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_326" + op: "Mul" + input: "mul_326/x" + input: "bert/encoder/layer_3/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_205" + op: "Add" + input: "truediv_60" + input: "mul_326" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_327" + op: "Mul" + input: "add" + input: "add_205" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_60" + op: "Sub" + input: "bert/encoder/layer_3/attention/output/dense/kernel/read" + input: "mul_327" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_376" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/dense/kernel" + input: "sub_60" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_377" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + input: "add_202" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_378" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + input: "add_203" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_3/attention/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_3/attention/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_328/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_328" + op: "Mul" + input: "Mul_328/x" + input: "bert/encoder/layer_3/attention/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_329/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_329" + op: "Mul" + input: "Mul_329/x" + input: "clip_by_global_norm/clip_by_global_norm/_60" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_206" + op: "Add" + input: "Mul_328" + input: "Mul_329" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_330/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_330" + op: "Mul" + input: "Mul_330/x" + input: "bert/encoder/layer_3/attention/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_60" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_60" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_331/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_331" + op: "Mul" + input: "Mul_331/x" + input: "Square_60" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_207" + op: "Add" + input: "Mul_330" + input: "Mul_331" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_60" + op: "Sqrt" + input: "add_207" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_208/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_208" + op: "Add" + input: "Sqrt_60" + input: "add_208/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_61" + op: "RealDiv" + input: "add_206" + input: "add_208" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_332" + op: "Mul" + input: "add" + input: "truediv_61" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_61" + op: "Sub" + input: "bert/encoder/layer_3/attention/output/dense/bias/read" + input: "mul_332" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_379" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/dense/bias" + input: "sub_61" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_380" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/dense/bias/adam_m" + input: "add_206" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_381" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/dense/bias/adam_v" + input: "add_207" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_333/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_333" + op: "Mul" + input: "Mul_333/x" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_334/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_334" + op: "Mul" + input: "Mul_334/x" + input: "clip_by_global_norm/clip_by_global_norm/_61" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_209" + op: "Add" + input: "Mul_333" + input: "Mul_334" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_335/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_335" + op: "Mul" + input: "Mul_335/x" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_61" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_61" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_336/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_336" + op: "Mul" + input: "Mul_336/x" + input: "Square_61" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_210" + op: "Add" + input: "Mul_335" + input: "Mul_336" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_61" + op: "Sqrt" + input: "add_210" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_211/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_211" + op: "Add" + input: "Sqrt_61" + input: "add_211/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_62" + op: "RealDiv" + input: "add_209" + input: "add_211" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_337" + op: "Mul" + input: "add" + input: "truediv_62" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_62" + op: "Sub" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/read" + input: "mul_337" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_382" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta" + input: "sub_62" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_383" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m" + input: "add_209" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_384" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v" + input: "add_210" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_338/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_338" + op: "Mul" + input: "Mul_338/x" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_339/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_339" + op: "Mul" + input: "Mul_339/x" + input: "clip_by_global_norm/clip_by_global_norm/_62" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_212" + op: "Add" + input: "Mul_338" + input: "Mul_339" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_340/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_340" + op: "Mul" + input: "Mul_340/x" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_62" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_62" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_341/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_341" + op: "Mul" + input: "Mul_341/x" + input: "Square_62" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_213" + op: "Add" + input: "Mul_340" + input: "Mul_341" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_62" + op: "Sqrt" + input: "add_213" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_214/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_214" + op: "Add" + input: "Sqrt_62" + input: "add_214/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_63" + op: "RealDiv" + input: "add_212" + input: "add_214" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_342" + op: "Mul" + input: "add" + input: "truediv_63" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_63" + op: "Sub" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/read" + input: "mul_342" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_385" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma" + input: "sub_63" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_386" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m" + input: "add_212" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_387" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v" + input: "add_213" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_343/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_343" + op: "Mul" + input: "Mul_343/x" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_344/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_344" + op: "Mul" + input: "Mul_344/x" + input: "clip_by_global_norm/clip_by_global_norm/_63" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_215" + op: "Add" + input: "Mul_343" + input: "Mul_344" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_345/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_345" + op: "Mul" + input: "Mul_345/x" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_63" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_63" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_346/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_346" + op: "Mul" + input: "Mul_346/x" + input: "Square_63" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_216" + op: "Add" + input: "Mul_345" + input: "Mul_346" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_63" + op: "Sqrt" + input: "add_216" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_217/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_217" + op: "Add" + input: "Sqrt_63" + input: "add_217/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_64" + op: "RealDiv" + input: "add_215" + input: "add_217" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_347/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_347" + op: "Mul" + input: "mul_347/x" + input: "bert/encoder/layer_3/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_218" + op: "Add" + input: "truediv_64" + input: "mul_347" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_348" + op: "Mul" + input: "add" + input: "add_218" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_64" + op: "Sub" + input: "bert/encoder/layer_3/intermediate/dense/kernel/read" + input: "mul_348" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_388" + op: "Assign" + input: "bert/encoder/layer_3/intermediate/dense/kernel" + input: "sub_64" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_389" + op: "Assign" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + input: "add_215" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_390" + op: "Assign" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + input: "add_216" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/bias/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/bias/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_349/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_349" + op: "Mul" + input: "Mul_349/x" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_350/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_350" + op: "Mul" + input: "Mul_350/x" + input: "clip_by_global_norm/clip_by_global_norm/_64" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_219" + op: "Add" + input: "Mul_349" + input: "Mul_350" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_351/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_351" + op: "Mul" + input: "Mul_351/x" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_64" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_64" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_352/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_352" + op: "Mul" + input: "Mul_352/x" + input: "Square_64" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_220" + op: "Add" + input: "Mul_351" + input: "Mul_352" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_64" + op: "Sqrt" + input: "add_220" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_221/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_221" + op: "Add" + input: "Sqrt_64" + input: "add_221/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_65" + op: "RealDiv" + input: "add_219" + input: "add_221" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_353" + op: "Mul" + input: "add" + input: "truediv_65" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_65" + op: "Sub" + input: "bert/encoder/layer_3/intermediate/dense/bias/read" + input: "mul_353" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_391" + op: "Assign" + input: "bert/encoder/layer_3/intermediate/dense/bias" + input: "sub_65" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_392" + op: "Assign" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_m" + input: "add_219" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_393" + op: "Assign" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_v" + input: "add_220" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_3/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_3/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_3/output/dense/kernel/adam_m" + input: "bert/encoder/layer_3/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_3/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_3/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_3/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_3/output/dense/kernel/adam_v" + input: "bert/encoder/layer_3/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_3/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_354/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_354" + op: "Mul" + input: "Mul_354/x" + input: "bert/encoder/layer_3/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_355/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_355" + op: "Mul" + input: "Mul_355/x" + input: "clip_by_global_norm/clip_by_global_norm/_65" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_222" + op: "Add" + input: "Mul_354" + input: "Mul_355" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_356/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_356" + op: "Mul" + input: "Mul_356/x" + input: "bert/encoder/layer_3/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_65" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_65" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_357/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_357" + op: "Mul" + input: "Mul_357/x" + input: "Square_65" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_223" + op: "Add" + input: "Mul_356" + input: "Mul_357" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_65" + op: "Sqrt" + input: "add_223" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_224/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_224" + op: "Add" + input: "Sqrt_65" + input: "add_224/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_66" + op: "RealDiv" + input: "add_222" + input: "add_224" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_358/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_358" + op: "Mul" + input: "mul_358/x" + input: "bert/encoder/layer_3/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_225" + op: "Add" + input: "truediv_66" + input: "mul_358" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_359" + op: "Mul" + input: "add" + input: "add_225" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_66" + op: "Sub" + input: "bert/encoder/layer_3/output/dense/kernel/read" + input: "mul_359" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_394" + op: "Assign" + input: "bert/encoder/layer_3/output/dense/kernel" + input: "sub_66" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_395" + op: "Assign" + input: "bert/encoder/layer_3/output/dense/kernel/adam_m" + input: "add_222" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_396" + op: "Assign" + input: "bert/encoder/layer_3/output/dense/kernel/adam_v" + input: "add_223" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_3/output/dense/bias/adam_m" + input: "bert/encoder/layer_3/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_3/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_3/output/dense/bias/adam_v" + input: "bert/encoder/layer_3/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_3/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_360/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_360" + op: "Mul" + input: "Mul_360/x" + input: "bert/encoder/layer_3/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_361/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_361" + op: "Mul" + input: "Mul_361/x" + input: "clip_by_global_norm/clip_by_global_norm/_66" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_226" + op: "Add" + input: "Mul_360" + input: "Mul_361" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_362/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_362" + op: "Mul" + input: "Mul_362/x" + input: "bert/encoder/layer_3/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_66" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_66" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_363/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_363" + op: "Mul" + input: "Mul_363/x" + input: "Square_66" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_227" + op: "Add" + input: "Mul_362" + input: "Mul_363" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_66" + op: "Sqrt" + input: "add_227" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_228/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_228" + op: "Add" + input: "Sqrt_66" + input: "add_228/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_67" + op: "RealDiv" + input: "add_226" + input: "add_228" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_364" + op: "Mul" + input: "add" + input: "truediv_67" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_67" + op: "Sub" + input: "bert/encoder/layer_3/output/dense/bias/read" + input: "mul_364" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_397" + op: "Assign" + input: "bert/encoder/layer_3/output/dense/bias" + input: "sub_67" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_398" + op: "Assign" + input: "bert/encoder/layer_3/output/dense/bias/adam_m" + input: "add_226" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_399" + op: "Assign" + input: "bert/encoder/layer_3/output/dense/bias/adam_v" + input: "add_227" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_365/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_365" + op: "Mul" + input: "Mul_365/x" + input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_366/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_366" + op: "Mul" + input: "Mul_366/x" + input: "clip_by_global_norm/clip_by_global_norm/_67" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_229" + op: "Add" + input: "Mul_365" + input: "Mul_366" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_367/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_367" + op: "Mul" + input: "Mul_367/x" + input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_67" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_67" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_368/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_368" + op: "Mul" + input: "Mul_368/x" + input: "Square_67" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_230" + op: "Add" + input: "Mul_367" + input: "Mul_368" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_67" + op: "Sqrt" + input: "add_230" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_231/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_231" + op: "Add" + input: "Sqrt_67" + input: "add_231/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_68" + op: "RealDiv" + input: "add_229" + input: "add_231" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_369" + op: "Mul" + input: "add" + input: "truediv_68" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_68" + op: "Sub" + input: "bert/encoder/layer_3/output/LayerNorm/beta/read" + input: "mul_369" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_400" + op: "Assign" + input: "bert/encoder/layer_3/output/LayerNorm/beta" + input: "sub_68" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_401" + op: "Assign" + input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m" + input: "add_229" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_402" + op: "Assign" + input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v" + input: "add_230" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_370/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_370" + op: "Mul" + input: "Mul_370/x" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_371/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_371" + op: "Mul" + input: "Mul_371/x" + input: "clip_by_global_norm/clip_by_global_norm/_68" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_232" + op: "Add" + input: "Mul_370" + input: "Mul_371" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_372/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_372" + op: "Mul" + input: "Mul_372/x" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_68" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_68" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_373/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_373" + op: "Mul" + input: "Mul_373/x" + input: "Square_68" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_233" + op: "Add" + input: "Mul_372" + input: "Mul_373" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_68" + op: "Sqrt" + input: "add_233" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_234/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_234" + op: "Add" + input: "Sqrt_68" + input: "add_234/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_69" + op: "RealDiv" + input: "add_232" + input: "add_234" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_374" + op: "Mul" + input: "add" + input: "truediv_69" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_69" + op: "Sub" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/read" + input: "mul_374" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_403" + op: "Assign" + input: "bert/encoder/layer_3/output/LayerNorm/gamma" + input: "sub_69" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_404" + op: "Assign" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m" + input: "add_232" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_405" + op: "Assign" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v" + input: "add_233" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_375/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_375" + op: "Mul" + input: "Mul_375/x" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_376/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_376" + op: "Mul" + input: "Mul_376/x" + input: "clip_by_global_norm/clip_by_global_norm/_69" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_235" + op: "Add" + input: "Mul_375" + input: "Mul_376" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_377/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_377" + op: "Mul" + input: "Mul_377/x" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_69" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_69" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_378/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_378" + op: "Mul" + input: "Mul_378/x" + input: "Square_69" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_236" + op: "Add" + input: "Mul_377" + input: "Mul_378" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_69" + op: "Sqrt" + input: "add_236" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_237/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_237" + op: "Add" + input: "Sqrt_69" + input: "add_237/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_70" + op: "RealDiv" + input: "add_235" + input: "add_237" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_379/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_379" + op: "Mul" + input: "mul_379/x" + input: "bert/encoder/layer_4/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_238" + op: "Add" + input: "truediv_70" + input: "mul_379" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_380" + op: "Mul" + input: "add" + input: "add_238" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_70" + op: "Sub" + input: "bert/encoder/layer_4/attention/self/query/kernel/read" + input: "mul_380" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_406" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/query/kernel" + input: "sub_70" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_407" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_m" + input: "add_235" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_408" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_v" + input: "add_236" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_4/attention/self/query/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/self/query/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_4/attention/self/query/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/self/query/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_381/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_381" + op: "Mul" + input: "Mul_381/x" + input: "bert/encoder/layer_4/attention/self/query/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_382/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_382" + op: "Mul" + input: "Mul_382/x" + input: "clip_by_global_norm/clip_by_global_norm/_70" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_239" + op: "Add" + input: "Mul_381" + input: "Mul_382" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_383/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_383" + op: "Mul" + input: "Mul_383/x" + input: "bert/encoder/layer_4/attention/self/query/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_70" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_70" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_384/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_384" + op: "Mul" + input: "Mul_384/x" + input: "Square_70" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_240" + op: "Add" + input: "Mul_383" + input: "Mul_384" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_70" + op: "Sqrt" + input: "add_240" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_241/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_241" + op: "Add" + input: "Sqrt_70" + input: "add_241/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_71" + op: "RealDiv" + input: "add_239" + input: "add_241" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_385" + op: "Mul" + input: "add" + input: "truediv_71" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_71" + op: "Sub" + input: "bert/encoder/layer_4/attention/self/query/bias/read" + input: "mul_385" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_409" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/query/bias" + input: "sub_71" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_410" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/query/bias/adam_m" + input: "add_239" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_411" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/query/bias/adam_v" + input: "add_240" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_386/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_386" + op: "Mul" + input: "Mul_386/x" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_387/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_387" + op: "Mul" + input: "Mul_387/x" + input: "clip_by_global_norm/clip_by_global_norm/_71" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_242" + op: "Add" + input: "Mul_386" + input: "Mul_387" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_388/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_388" + op: "Mul" + input: "Mul_388/x" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_71" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_71" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_389/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_389" + op: "Mul" + input: "Mul_389/x" + input: "Square_71" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_243" + op: "Add" + input: "Mul_388" + input: "Mul_389" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_71" + op: "Sqrt" + input: "add_243" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_244/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_244" + op: "Add" + input: "Sqrt_71" + input: "add_244/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_72" + op: "RealDiv" + input: "add_242" + input: "add_244" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_390/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_390" + op: "Mul" + input: "mul_390/x" + input: "bert/encoder/layer_4/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_245" + op: "Add" + input: "truediv_72" + input: "mul_390" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_391" + op: "Mul" + input: "add" + input: "add_245" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_72" + op: "Sub" + input: "bert/encoder/layer_4/attention/self/key/kernel/read" + input: "mul_391" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_412" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/key/kernel" + input: "sub_72" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_413" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_m" + input: "add_242" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_414" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_v" + input: "add_243" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_4/attention/self/key/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/self/key/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_4/attention/self/key/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/self/key/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_392/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_392" + op: "Mul" + input: "Mul_392/x" + input: "bert/encoder/layer_4/attention/self/key/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_393/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_393" + op: "Mul" + input: "Mul_393/x" + input: "clip_by_global_norm/clip_by_global_norm/_72" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_246" + op: "Add" + input: "Mul_392" + input: "Mul_393" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_394/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_394" + op: "Mul" + input: "Mul_394/x" + input: "bert/encoder/layer_4/attention/self/key/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_72" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_72" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_395/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_395" + op: "Mul" + input: "Mul_395/x" + input: "Square_72" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_247" + op: "Add" + input: "Mul_394" + input: "Mul_395" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_72" + op: "Sqrt" + input: "add_247" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_248/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_248" + op: "Add" + input: "Sqrt_72" + input: "add_248/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_73" + op: "RealDiv" + input: "add_246" + input: "add_248" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_396" + op: "Mul" + input: "add" + input: "truediv_73" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_73" + op: "Sub" + input: "bert/encoder/layer_4/attention/self/key/bias/read" + input: "mul_396" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_415" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/key/bias" + input: "sub_73" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_416" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/key/bias/adam_m" + input: "add_246" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_417" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/key/bias/adam_v" + input: "add_247" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_397/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_397" + op: "Mul" + input: "Mul_397/x" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_398/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_398" + op: "Mul" + input: "Mul_398/x" + input: "clip_by_global_norm/clip_by_global_norm/_73" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_249" + op: "Add" + input: "Mul_397" + input: "Mul_398" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_399/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_399" + op: "Mul" + input: "Mul_399/x" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_73" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_73" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_400/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_400" + op: "Mul" + input: "Mul_400/x" + input: "Square_73" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_250" + op: "Add" + input: "Mul_399" + input: "Mul_400" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_73" + op: "Sqrt" + input: "add_250" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_251/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_251" + op: "Add" + input: "Sqrt_73" + input: "add_251/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_74" + op: "RealDiv" + input: "add_249" + input: "add_251" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_401/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_401" + op: "Mul" + input: "mul_401/x" + input: "bert/encoder/layer_4/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_252" + op: "Add" + input: "truediv_74" + input: "mul_401" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_402" + op: "Mul" + input: "add" + input: "add_252" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_74" + op: "Sub" + input: "bert/encoder/layer_4/attention/self/value/kernel/read" + input: "mul_402" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_418" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/value/kernel" + input: "sub_74" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_419" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_m" + input: "add_249" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_420" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_v" + input: "add_250" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_4/attention/self/value/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/self/value/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_4/attention/self/value/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/self/value/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_403/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_403" + op: "Mul" + input: "Mul_403/x" + input: "bert/encoder/layer_4/attention/self/value/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_404/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_404" + op: "Mul" + input: "Mul_404/x" + input: "clip_by_global_norm/clip_by_global_norm/_74" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_253" + op: "Add" + input: "Mul_403" + input: "Mul_404" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_405/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_405" + op: "Mul" + input: "Mul_405/x" + input: "bert/encoder/layer_4/attention/self/value/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_74" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_74" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_406/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_406" + op: "Mul" + input: "Mul_406/x" + input: "Square_74" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_254" + op: "Add" + input: "Mul_405" + input: "Mul_406" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_74" + op: "Sqrt" + input: "add_254" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_255/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_255" + op: "Add" + input: "Sqrt_74" + input: "add_255/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_75" + op: "RealDiv" + input: "add_253" + input: "add_255" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_407" + op: "Mul" + input: "add" + input: "truediv_75" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_75" + op: "Sub" + input: "bert/encoder/layer_4/attention/self/value/bias/read" + input: "mul_407" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_421" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/value/bias" + input: "sub_75" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_422" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/value/bias/adam_m" + input: "add_253" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_423" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/value/bias/adam_v" + input: "add_254" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_408/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_408" + op: "Mul" + input: "Mul_408/x" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_409/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_409" + op: "Mul" + input: "Mul_409/x" + input: "clip_by_global_norm/clip_by_global_norm/_75" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_256" + op: "Add" + input: "Mul_408" + input: "Mul_409" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_410/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_410" + op: "Mul" + input: "Mul_410/x" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_75" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_75" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_411/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_411" + op: "Mul" + input: "Mul_411/x" + input: "Square_75" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_257" + op: "Add" + input: "Mul_410" + input: "Mul_411" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_75" + op: "Sqrt" + input: "add_257" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_258/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_258" + op: "Add" + input: "Sqrt_75" + input: "add_258/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_76" + op: "RealDiv" + input: "add_256" + input: "add_258" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_412/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_412" + op: "Mul" + input: "mul_412/x" + input: "bert/encoder/layer_4/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_259" + op: "Add" + input: "truediv_76" + input: "mul_412" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_413" + op: "Mul" + input: "add" + input: "add_259" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_76" + op: "Sub" + input: "bert/encoder/layer_4/attention/output/dense/kernel/read" + input: "mul_413" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_424" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/dense/kernel" + input: "sub_76" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_425" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + input: "add_256" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_426" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + input: "add_257" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_4/attention/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_4/attention/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_414/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_414" + op: "Mul" + input: "Mul_414/x" + input: "bert/encoder/layer_4/attention/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_415/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_415" + op: "Mul" + input: "Mul_415/x" + input: "clip_by_global_norm/clip_by_global_norm/_76" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_260" + op: "Add" + input: "Mul_414" + input: "Mul_415" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_416/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_416" + op: "Mul" + input: "Mul_416/x" + input: "bert/encoder/layer_4/attention/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_76" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_76" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_417/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_417" + op: "Mul" + input: "Mul_417/x" + input: "Square_76" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_261" + op: "Add" + input: "Mul_416" + input: "Mul_417" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_76" + op: "Sqrt" + input: "add_261" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_262/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_262" + op: "Add" + input: "Sqrt_76" + input: "add_262/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_77" + op: "RealDiv" + input: "add_260" + input: "add_262" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_418" + op: "Mul" + input: "add" + input: "truediv_77" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_77" + op: "Sub" + input: "bert/encoder/layer_4/attention/output/dense/bias/read" + input: "mul_418" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_427" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/dense/bias" + input: "sub_77" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_428" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/dense/bias/adam_m" + input: "add_260" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_429" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/dense/bias/adam_v" + input: "add_261" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_419/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_419" + op: "Mul" + input: "Mul_419/x" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_420/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_420" + op: "Mul" + input: "Mul_420/x" + input: "clip_by_global_norm/clip_by_global_norm/_77" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_263" + op: "Add" + input: "Mul_419" + input: "Mul_420" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_421/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_421" + op: "Mul" + input: "Mul_421/x" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_77" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_77" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_422/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_422" + op: "Mul" + input: "Mul_422/x" + input: "Square_77" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_264" + op: "Add" + input: "Mul_421" + input: "Mul_422" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_77" + op: "Sqrt" + input: "add_264" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_265/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_265" + op: "Add" + input: "Sqrt_77" + input: "add_265/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_78" + op: "RealDiv" + input: "add_263" + input: "add_265" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_423" + op: "Mul" + input: "add" + input: "truediv_78" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_78" + op: "Sub" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/read" + input: "mul_423" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_430" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta" + input: "sub_78" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_431" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m" + input: "add_263" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_432" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v" + input: "add_264" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_424/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_424" + op: "Mul" + input: "Mul_424/x" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_425/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_425" + op: "Mul" + input: "Mul_425/x" + input: "clip_by_global_norm/clip_by_global_norm/_78" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_266" + op: "Add" + input: "Mul_424" + input: "Mul_425" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_426/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_426" + op: "Mul" + input: "Mul_426/x" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_78" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_78" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_427/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_427" + op: "Mul" + input: "Mul_427/x" + input: "Square_78" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_267" + op: "Add" + input: "Mul_426" + input: "Mul_427" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_78" + op: "Sqrt" + input: "add_267" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_268/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_268" + op: "Add" + input: "Sqrt_78" + input: "add_268/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_79" + op: "RealDiv" + input: "add_266" + input: "add_268" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_428" + op: "Mul" + input: "add" + input: "truediv_79" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_79" + op: "Sub" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/read" + input: "mul_428" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_433" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma" + input: "sub_79" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_434" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m" + input: "add_266" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_435" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v" + input: "add_267" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_429/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_429" + op: "Mul" + input: "Mul_429/x" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_430/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_430" + op: "Mul" + input: "Mul_430/x" + input: "clip_by_global_norm/clip_by_global_norm/_79" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_269" + op: "Add" + input: "Mul_429" + input: "Mul_430" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_431/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_431" + op: "Mul" + input: "Mul_431/x" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_79" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_79" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_432/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_432" + op: "Mul" + input: "Mul_432/x" + input: "Square_79" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_270" + op: "Add" + input: "Mul_431" + input: "Mul_432" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_79" + op: "Sqrt" + input: "add_270" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_271/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_271" + op: "Add" + input: "Sqrt_79" + input: "add_271/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_80" + op: "RealDiv" + input: "add_269" + input: "add_271" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_433/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_433" + op: "Mul" + input: "mul_433/x" + input: "bert/encoder/layer_4/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_272" + op: "Add" + input: "truediv_80" + input: "mul_433" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_434" + op: "Mul" + input: "add" + input: "add_272" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_80" + op: "Sub" + input: "bert/encoder/layer_4/intermediate/dense/kernel/read" + input: "mul_434" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_436" + op: "Assign" + input: "bert/encoder/layer_4/intermediate/dense/kernel" + input: "sub_80" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_437" + op: "Assign" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + input: "add_269" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_438" + op: "Assign" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + input: "add_270" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/bias/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/bias/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_435/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_435" + op: "Mul" + input: "Mul_435/x" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_436/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_436" + op: "Mul" + input: "Mul_436/x" + input: "clip_by_global_norm/clip_by_global_norm/_80" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_273" + op: "Add" + input: "Mul_435" + input: "Mul_436" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_437/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_437" + op: "Mul" + input: "Mul_437/x" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_80" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_80" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_438/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_438" + op: "Mul" + input: "Mul_438/x" + input: "Square_80" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_274" + op: "Add" + input: "Mul_437" + input: "Mul_438" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_80" + op: "Sqrt" + input: "add_274" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_275/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_275" + op: "Add" + input: "Sqrt_80" + input: "add_275/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_81" + op: "RealDiv" + input: "add_273" + input: "add_275" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_439" + op: "Mul" + input: "add" + input: "truediv_81" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_81" + op: "Sub" + input: "bert/encoder/layer_4/intermediate/dense/bias/read" + input: "mul_439" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_439" + op: "Assign" + input: "bert/encoder/layer_4/intermediate/dense/bias" + input: "sub_81" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_440" + op: "Assign" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_m" + input: "add_273" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_441" + op: "Assign" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_v" + input: "add_274" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_4/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_4/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_4/output/dense/kernel/adam_m" + input: "bert/encoder/layer_4/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_4/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_4/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_4/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_4/output/dense/kernel/adam_v" + input: "bert/encoder/layer_4/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_4/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_440/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_440" + op: "Mul" + input: "Mul_440/x" + input: "bert/encoder/layer_4/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_441/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_441" + op: "Mul" + input: "Mul_441/x" + input: "clip_by_global_norm/clip_by_global_norm/_81" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_276" + op: "Add" + input: "Mul_440" + input: "Mul_441" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_442/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_442" + op: "Mul" + input: "Mul_442/x" + input: "bert/encoder/layer_4/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_81" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_81" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_443/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_443" + op: "Mul" + input: "Mul_443/x" + input: "Square_81" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_277" + op: "Add" + input: "Mul_442" + input: "Mul_443" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_81" + op: "Sqrt" + input: "add_277" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_278/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_278" + op: "Add" + input: "Sqrt_81" + input: "add_278/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_82" + op: "RealDiv" + input: "add_276" + input: "add_278" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_444/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_444" + op: "Mul" + input: "mul_444/x" + input: "bert/encoder/layer_4/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_279" + op: "Add" + input: "truediv_82" + input: "mul_444" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_445" + op: "Mul" + input: "add" + input: "add_279" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_82" + op: "Sub" + input: "bert/encoder/layer_4/output/dense/kernel/read" + input: "mul_445" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_442" + op: "Assign" + input: "bert/encoder/layer_4/output/dense/kernel" + input: "sub_82" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_443" + op: "Assign" + input: "bert/encoder/layer_4/output/dense/kernel/adam_m" + input: "add_276" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_444" + op: "Assign" + input: "bert/encoder/layer_4/output/dense/kernel/adam_v" + input: "add_277" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_4/output/dense/bias/adam_m" + input: "bert/encoder/layer_4/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_4/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_4/output/dense/bias/adam_v" + input: "bert/encoder/layer_4/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_4/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_446/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_446" + op: "Mul" + input: "Mul_446/x" + input: "bert/encoder/layer_4/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_447/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_447" + op: "Mul" + input: "Mul_447/x" + input: "clip_by_global_norm/clip_by_global_norm/_82" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_280" + op: "Add" + input: "Mul_446" + input: "Mul_447" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_448/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_448" + op: "Mul" + input: "Mul_448/x" + input: "bert/encoder/layer_4/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_82" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_82" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_449/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_449" + op: "Mul" + input: "Mul_449/x" + input: "Square_82" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_281" + op: "Add" + input: "Mul_448" + input: "Mul_449" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_82" + op: "Sqrt" + input: "add_281" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_282/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_282" + op: "Add" + input: "Sqrt_82" + input: "add_282/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_83" + op: "RealDiv" + input: "add_280" + input: "add_282" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_450" + op: "Mul" + input: "add" + input: "truediv_83" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_83" + op: "Sub" + input: "bert/encoder/layer_4/output/dense/bias/read" + input: "mul_450" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_445" + op: "Assign" + input: "bert/encoder/layer_4/output/dense/bias" + input: "sub_83" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_446" + op: "Assign" + input: "bert/encoder/layer_4/output/dense/bias/adam_m" + input: "add_280" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_447" + op: "Assign" + input: "bert/encoder/layer_4/output/dense/bias/adam_v" + input: "add_281" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_451/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_451" + op: "Mul" + input: "Mul_451/x" + input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_452/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_452" + op: "Mul" + input: "Mul_452/x" + input: "clip_by_global_norm/clip_by_global_norm/_83" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_283" + op: "Add" + input: "Mul_451" + input: "Mul_452" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_453/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_453" + op: "Mul" + input: "Mul_453/x" + input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_83" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_83" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_454/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_454" + op: "Mul" + input: "Mul_454/x" + input: "Square_83" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_284" + op: "Add" + input: "Mul_453" + input: "Mul_454" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_83" + op: "Sqrt" + input: "add_284" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_285/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_285" + op: "Add" + input: "Sqrt_83" + input: "add_285/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_84" + op: "RealDiv" + input: "add_283" + input: "add_285" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_455" + op: "Mul" + input: "add" + input: "truediv_84" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_84" + op: "Sub" + input: "bert/encoder/layer_4/output/LayerNorm/beta/read" + input: "mul_455" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_448" + op: "Assign" + input: "bert/encoder/layer_4/output/LayerNorm/beta" + input: "sub_84" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_449" + op: "Assign" + input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m" + input: "add_283" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_450" + op: "Assign" + input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v" + input: "add_284" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_456/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_456" + op: "Mul" + input: "Mul_456/x" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_457/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_457" + op: "Mul" + input: "Mul_457/x" + input: "clip_by_global_norm/clip_by_global_norm/_84" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_286" + op: "Add" + input: "Mul_456" + input: "Mul_457" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_458/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_458" + op: "Mul" + input: "Mul_458/x" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_84" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_84" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_459/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_459" + op: "Mul" + input: "Mul_459/x" + input: "Square_84" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_287" + op: "Add" + input: "Mul_458" + input: "Mul_459" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_84" + op: "Sqrt" + input: "add_287" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_288/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_288" + op: "Add" + input: "Sqrt_84" + input: "add_288/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_85" + op: "RealDiv" + input: "add_286" + input: "add_288" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_460" + op: "Mul" + input: "add" + input: "truediv_85" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_85" + op: "Sub" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/read" + input: "mul_460" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_451" + op: "Assign" + input: "bert/encoder/layer_4/output/LayerNorm/gamma" + input: "sub_85" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_452" + op: "Assign" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m" + input: "add_286" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_453" + op: "Assign" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v" + input: "add_287" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_461/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_461" + op: "Mul" + input: "Mul_461/x" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_462/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_462" + op: "Mul" + input: "Mul_462/x" + input: "clip_by_global_norm/clip_by_global_norm/_85" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_289" + op: "Add" + input: "Mul_461" + input: "Mul_462" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_463/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_463" + op: "Mul" + input: "Mul_463/x" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_85" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_85" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_464/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_464" + op: "Mul" + input: "Mul_464/x" + input: "Square_85" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_290" + op: "Add" + input: "Mul_463" + input: "Mul_464" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_85" + op: "Sqrt" + input: "add_290" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_291/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_291" + op: "Add" + input: "Sqrt_85" + input: "add_291/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_86" + op: "RealDiv" + input: "add_289" + input: "add_291" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_465/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_465" + op: "Mul" + input: "mul_465/x" + input: "bert/encoder/layer_5/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_292" + op: "Add" + input: "truediv_86" + input: "mul_465" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_466" + op: "Mul" + input: "add" + input: "add_292" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_86" + op: "Sub" + input: "bert/encoder/layer_5/attention/self/query/kernel/read" + input: "mul_466" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_454" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/query/kernel" + input: "sub_86" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_455" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_m" + input: "add_289" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_456" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_v" + input: "add_290" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_5/attention/self/query/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/self/query/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_5/attention/self/query/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/self/query/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_467/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_467" + op: "Mul" + input: "Mul_467/x" + input: "bert/encoder/layer_5/attention/self/query/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_468/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_468" + op: "Mul" + input: "Mul_468/x" + input: "clip_by_global_norm/clip_by_global_norm/_86" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_293" + op: "Add" + input: "Mul_467" + input: "Mul_468" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_469/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_469" + op: "Mul" + input: "Mul_469/x" + input: "bert/encoder/layer_5/attention/self/query/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_86" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_86" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_470/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_470" + op: "Mul" + input: "Mul_470/x" + input: "Square_86" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_294" + op: "Add" + input: "Mul_469" + input: "Mul_470" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_86" + op: "Sqrt" + input: "add_294" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_295/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_295" + op: "Add" + input: "Sqrt_86" + input: "add_295/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_87" + op: "RealDiv" + input: "add_293" + input: "add_295" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_471" + op: "Mul" + input: "add" + input: "truediv_87" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_87" + op: "Sub" + input: "bert/encoder/layer_5/attention/self/query/bias/read" + input: "mul_471" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_457" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/query/bias" + input: "sub_87" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_458" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/query/bias/adam_m" + input: "add_293" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_459" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/query/bias/adam_v" + input: "add_294" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_472/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_472" + op: "Mul" + input: "Mul_472/x" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_473/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_473" + op: "Mul" + input: "Mul_473/x" + input: "clip_by_global_norm/clip_by_global_norm/_87" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_296" + op: "Add" + input: "Mul_472" + input: "Mul_473" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_474/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_474" + op: "Mul" + input: "Mul_474/x" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_87" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_87" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_475/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_475" + op: "Mul" + input: "Mul_475/x" + input: "Square_87" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_297" + op: "Add" + input: "Mul_474" + input: "Mul_475" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_87" + op: "Sqrt" + input: "add_297" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_298/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_298" + op: "Add" + input: "Sqrt_87" + input: "add_298/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_88" + op: "RealDiv" + input: "add_296" + input: "add_298" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_476/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_476" + op: "Mul" + input: "mul_476/x" + input: "bert/encoder/layer_5/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_299" + op: "Add" + input: "truediv_88" + input: "mul_476" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_477" + op: "Mul" + input: "add" + input: "add_299" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_88" + op: "Sub" + input: "bert/encoder/layer_5/attention/self/key/kernel/read" + input: "mul_477" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_460" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/key/kernel" + input: "sub_88" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_461" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_m" + input: "add_296" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_462" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_v" + input: "add_297" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_5/attention/self/key/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/self/key/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_5/attention/self/key/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/self/key/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_478/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_478" + op: "Mul" + input: "Mul_478/x" + input: "bert/encoder/layer_5/attention/self/key/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_479/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_479" + op: "Mul" + input: "Mul_479/x" + input: "clip_by_global_norm/clip_by_global_norm/_88" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_300" + op: "Add" + input: "Mul_478" + input: "Mul_479" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_480/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_480" + op: "Mul" + input: "Mul_480/x" + input: "bert/encoder/layer_5/attention/self/key/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_88" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_88" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_481/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_481" + op: "Mul" + input: "Mul_481/x" + input: "Square_88" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_301" + op: "Add" + input: "Mul_480" + input: "Mul_481" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_88" + op: "Sqrt" + input: "add_301" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_302/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_302" + op: "Add" + input: "Sqrt_88" + input: "add_302/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_89" + op: "RealDiv" + input: "add_300" + input: "add_302" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_482" + op: "Mul" + input: "add" + input: "truediv_89" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_89" + op: "Sub" + input: "bert/encoder/layer_5/attention/self/key/bias/read" + input: "mul_482" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_463" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/key/bias" + input: "sub_89" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_464" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/key/bias/adam_m" + input: "add_300" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_465" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/key/bias/adam_v" + input: "add_301" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_483/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_483" + op: "Mul" + input: "Mul_483/x" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_484/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_484" + op: "Mul" + input: "Mul_484/x" + input: "clip_by_global_norm/clip_by_global_norm/_89" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_303" + op: "Add" + input: "Mul_483" + input: "Mul_484" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_485/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_485" + op: "Mul" + input: "Mul_485/x" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_89" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_89" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_486/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_486" + op: "Mul" + input: "Mul_486/x" + input: "Square_89" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_304" + op: "Add" + input: "Mul_485" + input: "Mul_486" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_89" + op: "Sqrt" + input: "add_304" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_305/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_305" + op: "Add" + input: "Sqrt_89" + input: "add_305/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_90" + op: "RealDiv" + input: "add_303" + input: "add_305" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_487/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_487" + op: "Mul" + input: "mul_487/x" + input: "bert/encoder/layer_5/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_306" + op: "Add" + input: "truediv_90" + input: "mul_487" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_488" + op: "Mul" + input: "add" + input: "add_306" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_90" + op: "Sub" + input: "bert/encoder/layer_5/attention/self/value/kernel/read" + input: "mul_488" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_466" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/value/kernel" + input: "sub_90" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_467" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_m" + input: "add_303" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_468" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_v" + input: "add_304" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_5/attention/self/value/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/self/value/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_5/attention/self/value/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/self/value/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_489/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_489" + op: "Mul" + input: "Mul_489/x" + input: "bert/encoder/layer_5/attention/self/value/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_490/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_490" + op: "Mul" + input: "Mul_490/x" + input: "clip_by_global_norm/clip_by_global_norm/_90" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_307" + op: "Add" + input: "Mul_489" + input: "Mul_490" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_491/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_491" + op: "Mul" + input: "Mul_491/x" + input: "bert/encoder/layer_5/attention/self/value/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_90" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_90" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_492/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_492" + op: "Mul" + input: "Mul_492/x" + input: "Square_90" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_308" + op: "Add" + input: "Mul_491" + input: "Mul_492" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_90" + op: "Sqrt" + input: "add_308" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_309/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_309" + op: "Add" + input: "Sqrt_90" + input: "add_309/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_91" + op: "RealDiv" + input: "add_307" + input: "add_309" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_493" + op: "Mul" + input: "add" + input: "truediv_91" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_91" + op: "Sub" + input: "bert/encoder/layer_5/attention/self/value/bias/read" + input: "mul_493" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_469" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/value/bias" + input: "sub_91" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_470" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/value/bias/adam_m" + input: "add_307" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_471" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/value/bias/adam_v" + input: "add_308" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_494/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_494" + op: "Mul" + input: "Mul_494/x" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_495/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_495" + op: "Mul" + input: "Mul_495/x" + input: "clip_by_global_norm/clip_by_global_norm/_91" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_310" + op: "Add" + input: "Mul_494" + input: "Mul_495" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_496/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_496" + op: "Mul" + input: "Mul_496/x" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_91" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_91" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_497/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_497" + op: "Mul" + input: "Mul_497/x" + input: "Square_91" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_311" + op: "Add" + input: "Mul_496" + input: "Mul_497" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_91" + op: "Sqrt" + input: "add_311" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_312/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_312" + op: "Add" + input: "Sqrt_91" + input: "add_312/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_92" + op: "RealDiv" + input: "add_310" + input: "add_312" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_498/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_498" + op: "Mul" + input: "mul_498/x" + input: "bert/encoder/layer_5/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_313" + op: "Add" + input: "truediv_92" + input: "mul_498" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_499" + op: "Mul" + input: "add" + input: "add_313" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_92" + op: "Sub" + input: "bert/encoder/layer_5/attention/output/dense/kernel/read" + input: "mul_499" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_472" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/dense/kernel" + input: "sub_92" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_473" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + input: "add_310" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_474" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + input: "add_311" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_5/attention/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_5/attention/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_500/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_500" + op: "Mul" + input: "Mul_500/x" + input: "bert/encoder/layer_5/attention/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_501/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_501" + op: "Mul" + input: "Mul_501/x" + input: "clip_by_global_norm/clip_by_global_norm/_92" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_314" + op: "Add" + input: "Mul_500" + input: "Mul_501" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_502/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_502" + op: "Mul" + input: "Mul_502/x" + input: "bert/encoder/layer_5/attention/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_92" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_92" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_503/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_503" + op: "Mul" + input: "Mul_503/x" + input: "Square_92" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_315" + op: "Add" + input: "Mul_502" + input: "Mul_503" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_92" + op: "Sqrt" + input: "add_315" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_316/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_316" + op: "Add" + input: "Sqrt_92" + input: "add_316/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_93" + op: "RealDiv" + input: "add_314" + input: "add_316" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_504" + op: "Mul" + input: "add" + input: "truediv_93" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_93" + op: "Sub" + input: "bert/encoder/layer_5/attention/output/dense/bias/read" + input: "mul_504" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_475" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/dense/bias" + input: "sub_93" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_476" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/dense/bias/adam_m" + input: "add_314" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_477" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/dense/bias/adam_v" + input: "add_315" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_505/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_505" + op: "Mul" + input: "Mul_505/x" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_506/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_506" + op: "Mul" + input: "Mul_506/x" + input: "clip_by_global_norm/clip_by_global_norm/_93" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_317" + op: "Add" + input: "Mul_505" + input: "Mul_506" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_507/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_507" + op: "Mul" + input: "Mul_507/x" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_93" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_93" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_508/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_508" + op: "Mul" + input: "Mul_508/x" + input: "Square_93" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_318" + op: "Add" + input: "Mul_507" + input: "Mul_508" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_93" + op: "Sqrt" + input: "add_318" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_319/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_319" + op: "Add" + input: "Sqrt_93" + input: "add_319/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_94" + op: "RealDiv" + input: "add_317" + input: "add_319" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_509" + op: "Mul" + input: "add" + input: "truediv_94" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_94" + op: "Sub" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/read" + input: "mul_509" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_478" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta" + input: "sub_94" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_479" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m" + input: "add_317" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_480" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v" + input: "add_318" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_510/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_510" + op: "Mul" + input: "Mul_510/x" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_511/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_511" + op: "Mul" + input: "Mul_511/x" + input: "clip_by_global_norm/clip_by_global_norm/_94" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_320" + op: "Add" + input: "Mul_510" + input: "Mul_511" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_512/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_512" + op: "Mul" + input: "Mul_512/x" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_94" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_94" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_513/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_513" + op: "Mul" + input: "Mul_513/x" + input: "Square_94" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_321" + op: "Add" + input: "Mul_512" + input: "Mul_513" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_94" + op: "Sqrt" + input: "add_321" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_322/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_322" + op: "Add" + input: "Sqrt_94" + input: "add_322/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_95" + op: "RealDiv" + input: "add_320" + input: "add_322" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_514" + op: "Mul" + input: "add" + input: "truediv_95" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_95" + op: "Sub" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/read" + input: "mul_514" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_481" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma" + input: "sub_95" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_482" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m" + input: "add_320" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_483" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v" + input: "add_321" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_515/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_515" + op: "Mul" + input: "Mul_515/x" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_516/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_516" + op: "Mul" + input: "Mul_516/x" + input: "clip_by_global_norm/clip_by_global_norm/_95" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_323" + op: "Add" + input: "Mul_515" + input: "Mul_516" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_517/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_517" + op: "Mul" + input: "Mul_517/x" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_95" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_95" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_518/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_518" + op: "Mul" + input: "Mul_518/x" + input: "Square_95" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_324" + op: "Add" + input: "Mul_517" + input: "Mul_518" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_95" + op: "Sqrt" + input: "add_324" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_325/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_325" + op: "Add" + input: "Sqrt_95" + input: "add_325/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_96" + op: "RealDiv" + input: "add_323" + input: "add_325" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_519/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_519" + op: "Mul" + input: "mul_519/x" + input: "bert/encoder/layer_5/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_326" + op: "Add" + input: "truediv_96" + input: "mul_519" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_520" + op: "Mul" + input: "add" + input: "add_326" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_96" + op: "Sub" + input: "bert/encoder/layer_5/intermediate/dense/kernel/read" + input: "mul_520" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_484" + op: "Assign" + input: "bert/encoder/layer_5/intermediate/dense/kernel" + input: "sub_96" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_485" + op: "Assign" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + input: "add_323" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_486" + op: "Assign" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + input: "add_324" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/bias/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/bias/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_521/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_521" + op: "Mul" + input: "Mul_521/x" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_522/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_522" + op: "Mul" + input: "Mul_522/x" + input: "clip_by_global_norm/clip_by_global_norm/_96" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_327" + op: "Add" + input: "Mul_521" + input: "Mul_522" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_523/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_523" + op: "Mul" + input: "Mul_523/x" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_96" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_96" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_524/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_524" + op: "Mul" + input: "Mul_524/x" + input: "Square_96" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_328" + op: "Add" + input: "Mul_523" + input: "Mul_524" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_96" + op: "Sqrt" + input: "add_328" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_329/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_329" + op: "Add" + input: "Sqrt_96" + input: "add_329/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_97" + op: "RealDiv" + input: "add_327" + input: "add_329" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_525" + op: "Mul" + input: "add" + input: "truediv_97" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_97" + op: "Sub" + input: "bert/encoder/layer_5/intermediate/dense/bias/read" + input: "mul_525" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_487" + op: "Assign" + input: "bert/encoder/layer_5/intermediate/dense/bias" + input: "sub_97" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_488" + op: "Assign" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_m" + input: "add_327" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_489" + op: "Assign" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_v" + input: "add_328" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_5/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_5/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_5/output/dense/kernel/adam_m" + input: "bert/encoder/layer_5/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_5/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_5/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_5/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_5/output/dense/kernel/adam_v" + input: "bert/encoder/layer_5/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_5/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_526/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_526" + op: "Mul" + input: "Mul_526/x" + input: "bert/encoder/layer_5/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_527/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_527" + op: "Mul" + input: "Mul_527/x" + input: "clip_by_global_norm/clip_by_global_norm/_97" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_330" + op: "Add" + input: "Mul_526" + input: "Mul_527" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_528/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_528" + op: "Mul" + input: "Mul_528/x" + input: "bert/encoder/layer_5/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_97" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_97" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_529/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_529" + op: "Mul" + input: "Mul_529/x" + input: "Square_97" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_331" + op: "Add" + input: "Mul_528" + input: "Mul_529" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_97" + op: "Sqrt" + input: "add_331" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_332/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_332" + op: "Add" + input: "Sqrt_97" + input: "add_332/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_98" + op: "RealDiv" + input: "add_330" + input: "add_332" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_530/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_530" + op: "Mul" + input: "mul_530/x" + input: "bert/encoder/layer_5/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_333" + op: "Add" + input: "truediv_98" + input: "mul_530" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_531" + op: "Mul" + input: "add" + input: "add_333" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_98" + op: "Sub" + input: "bert/encoder/layer_5/output/dense/kernel/read" + input: "mul_531" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_490" + op: "Assign" + input: "bert/encoder/layer_5/output/dense/kernel" + input: "sub_98" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_491" + op: "Assign" + input: "bert/encoder/layer_5/output/dense/kernel/adam_m" + input: "add_330" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_492" + op: "Assign" + input: "bert/encoder/layer_5/output/dense/kernel/adam_v" + input: "add_331" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_5/output/dense/bias/adam_m" + input: "bert/encoder/layer_5/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_5/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_5/output/dense/bias/adam_v" + input: "bert/encoder/layer_5/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_5/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_532/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_532" + op: "Mul" + input: "Mul_532/x" + input: "bert/encoder/layer_5/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_533/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_533" + op: "Mul" + input: "Mul_533/x" + input: "clip_by_global_norm/clip_by_global_norm/_98" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_334" + op: "Add" + input: "Mul_532" + input: "Mul_533" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_534/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_534" + op: "Mul" + input: "Mul_534/x" + input: "bert/encoder/layer_5/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_98" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_98" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_535/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_535" + op: "Mul" + input: "Mul_535/x" + input: "Square_98" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_335" + op: "Add" + input: "Mul_534" + input: "Mul_535" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_98" + op: "Sqrt" + input: "add_335" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_336/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_336" + op: "Add" + input: "Sqrt_98" + input: "add_336/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_99" + op: "RealDiv" + input: "add_334" + input: "add_336" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_536" + op: "Mul" + input: "add" + input: "truediv_99" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_99" + op: "Sub" + input: "bert/encoder/layer_5/output/dense/bias/read" + input: "mul_536" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_493" + op: "Assign" + input: "bert/encoder/layer_5/output/dense/bias" + input: "sub_99" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_494" + op: "Assign" + input: "bert/encoder/layer_5/output/dense/bias/adam_m" + input: "add_334" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_495" + op: "Assign" + input: "bert/encoder/layer_5/output/dense/bias/adam_v" + input: "add_335" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_537/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_537" + op: "Mul" + input: "Mul_537/x" + input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_538/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_538" + op: "Mul" + input: "Mul_538/x" + input: "clip_by_global_norm/clip_by_global_norm/_99" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_337" + op: "Add" + input: "Mul_537" + input: "Mul_538" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_539/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_539" + op: "Mul" + input: "Mul_539/x" + input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_99" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_99" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_540/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_540" + op: "Mul" + input: "Mul_540/x" + input: "Square_99" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_338" + op: "Add" + input: "Mul_539" + input: "Mul_540" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_99" + op: "Sqrt" + input: "add_338" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_339/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_339" + op: "Add" + input: "Sqrt_99" + input: "add_339/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_100" + op: "RealDiv" + input: "add_337" + input: "add_339" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_541" + op: "Mul" + input: "add" + input: "truediv_100" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_100" + op: "Sub" + input: "bert/encoder/layer_5/output/LayerNorm/beta/read" + input: "mul_541" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_496" + op: "Assign" + input: "bert/encoder/layer_5/output/LayerNorm/beta" + input: "sub_100" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_497" + op: "Assign" + input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m" + input: "add_337" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_498" + op: "Assign" + input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v" + input: "add_338" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_542/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_542" + op: "Mul" + input: "Mul_542/x" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_543/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_543" + op: "Mul" + input: "Mul_543/x" + input: "clip_by_global_norm/clip_by_global_norm/_100" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_340" + op: "Add" + input: "Mul_542" + input: "Mul_543" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_544/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_544" + op: "Mul" + input: "Mul_544/x" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_100" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_100" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_545/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_545" + op: "Mul" + input: "Mul_545/x" + input: "Square_100" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_341" + op: "Add" + input: "Mul_544" + input: "Mul_545" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_100" + op: "Sqrt" + input: "add_341" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_342/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_342" + op: "Add" + input: "Sqrt_100" + input: "add_342/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_101" + op: "RealDiv" + input: "add_340" + input: "add_342" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_546" + op: "Mul" + input: "add" + input: "truediv_101" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_101" + op: "Sub" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/read" + input: "mul_546" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_499" + op: "Assign" + input: "bert/encoder/layer_5/output/LayerNorm/gamma" + input: "sub_101" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_500" + op: "Assign" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m" + input: "add_340" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_501" + op: "Assign" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v" + input: "add_341" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_547/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_547" + op: "Mul" + input: "Mul_547/x" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_548/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_548" + op: "Mul" + input: "Mul_548/x" + input: "clip_by_global_norm/clip_by_global_norm/_101" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_343" + op: "Add" + input: "Mul_547" + input: "Mul_548" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_549/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_549" + op: "Mul" + input: "Mul_549/x" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_101" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_101" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_550/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_550" + op: "Mul" + input: "Mul_550/x" + input: "Square_101" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_344" + op: "Add" + input: "Mul_549" + input: "Mul_550" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_101" + op: "Sqrt" + input: "add_344" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_345/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_345" + op: "Add" + input: "Sqrt_101" + input: "add_345/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_102" + op: "RealDiv" + input: "add_343" + input: "add_345" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_551/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_551" + op: "Mul" + input: "mul_551/x" + input: "bert/encoder/layer_6/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_346" + op: "Add" + input: "truediv_102" + input: "mul_551" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_552" + op: "Mul" + input: "add" + input: "add_346" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_102" + op: "Sub" + input: "bert/encoder/layer_6/attention/self/query/kernel/read" + input: "mul_552" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_502" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/query/kernel" + input: "sub_102" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_503" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_m" + input: "add_343" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_504" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_v" + input: "add_344" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_6/attention/self/query/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/self/query/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_6/attention/self/query/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/self/query/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_553/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_553" + op: "Mul" + input: "Mul_553/x" + input: "bert/encoder/layer_6/attention/self/query/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_554/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_554" + op: "Mul" + input: "Mul_554/x" + input: "clip_by_global_norm/clip_by_global_norm/_102" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_347" + op: "Add" + input: "Mul_553" + input: "Mul_554" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_555/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_555" + op: "Mul" + input: "Mul_555/x" + input: "bert/encoder/layer_6/attention/self/query/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_102" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_102" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_556/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_556" + op: "Mul" + input: "Mul_556/x" + input: "Square_102" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_348" + op: "Add" + input: "Mul_555" + input: "Mul_556" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_102" + op: "Sqrt" + input: "add_348" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_349/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_349" + op: "Add" + input: "Sqrt_102" + input: "add_349/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_103" + op: "RealDiv" + input: "add_347" + input: "add_349" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_557" + op: "Mul" + input: "add" + input: "truediv_103" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_103" + op: "Sub" + input: "bert/encoder/layer_6/attention/self/query/bias/read" + input: "mul_557" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_505" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/query/bias" + input: "sub_103" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_506" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/query/bias/adam_m" + input: "add_347" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_507" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/query/bias/adam_v" + input: "add_348" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_558/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_558" + op: "Mul" + input: "Mul_558/x" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_559/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_559" + op: "Mul" + input: "Mul_559/x" + input: "clip_by_global_norm/clip_by_global_norm/_103" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_350" + op: "Add" + input: "Mul_558" + input: "Mul_559" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_560/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_560" + op: "Mul" + input: "Mul_560/x" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_103" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_103" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_561/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_561" + op: "Mul" + input: "Mul_561/x" + input: "Square_103" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_351" + op: "Add" + input: "Mul_560" + input: "Mul_561" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_103" + op: "Sqrt" + input: "add_351" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_352/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_352" + op: "Add" + input: "Sqrt_103" + input: "add_352/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_104" + op: "RealDiv" + input: "add_350" + input: "add_352" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_562/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_562" + op: "Mul" + input: "mul_562/x" + input: "bert/encoder/layer_6/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_353" + op: "Add" + input: "truediv_104" + input: "mul_562" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_563" + op: "Mul" + input: "add" + input: "add_353" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_104" + op: "Sub" + input: "bert/encoder/layer_6/attention/self/key/kernel/read" + input: "mul_563" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_508" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/key/kernel" + input: "sub_104" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_509" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_m" + input: "add_350" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_510" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_v" + input: "add_351" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_6/attention/self/key/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/self/key/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_6/attention/self/key/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/self/key/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_564/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_564" + op: "Mul" + input: "Mul_564/x" + input: "bert/encoder/layer_6/attention/self/key/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_565/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_565" + op: "Mul" + input: "Mul_565/x" + input: "clip_by_global_norm/clip_by_global_norm/_104" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_354" + op: "Add" + input: "Mul_564" + input: "Mul_565" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_566/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_566" + op: "Mul" + input: "Mul_566/x" + input: "bert/encoder/layer_6/attention/self/key/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_104" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_104" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_567/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_567" + op: "Mul" + input: "Mul_567/x" + input: "Square_104" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_355" + op: "Add" + input: "Mul_566" + input: "Mul_567" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_104" + op: "Sqrt" + input: "add_355" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_356/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_356" + op: "Add" + input: "Sqrt_104" + input: "add_356/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_105" + op: "RealDiv" + input: "add_354" + input: "add_356" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_568" + op: "Mul" + input: "add" + input: "truediv_105" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_105" + op: "Sub" + input: "bert/encoder/layer_6/attention/self/key/bias/read" + input: "mul_568" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_511" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/key/bias" + input: "sub_105" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_512" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/key/bias/adam_m" + input: "add_354" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_513" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/key/bias/adam_v" + input: "add_355" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_569/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_569" + op: "Mul" + input: "Mul_569/x" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_570/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_570" + op: "Mul" + input: "Mul_570/x" + input: "clip_by_global_norm/clip_by_global_norm/_105" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_357" + op: "Add" + input: "Mul_569" + input: "Mul_570" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_571/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_571" + op: "Mul" + input: "Mul_571/x" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_105" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_105" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_572/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_572" + op: "Mul" + input: "Mul_572/x" + input: "Square_105" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_358" + op: "Add" + input: "Mul_571" + input: "Mul_572" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_105" + op: "Sqrt" + input: "add_358" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_359/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_359" + op: "Add" + input: "Sqrt_105" + input: "add_359/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_106" + op: "RealDiv" + input: "add_357" + input: "add_359" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_573/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_573" + op: "Mul" + input: "mul_573/x" + input: "bert/encoder/layer_6/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_360" + op: "Add" + input: "truediv_106" + input: "mul_573" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_574" + op: "Mul" + input: "add" + input: "add_360" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_106" + op: "Sub" + input: "bert/encoder/layer_6/attention/self/value/kernel/read" + input: "mul_574" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_514" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/value/kernel" + input: "sub_106" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_515" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_m" + input: "add_357" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_516" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_v" + input: "add_358" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_6/attention/self/value/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/self/value/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_6/attention/self/value/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/self/value/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_575/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_575" + op: "Mul" + input: "Mul_575/x" + input: "bert/encoder/layer_6/attention/self/value/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_576/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_576" + op: "Mul" + input: "Mul_576/x" + input: "clip_by_global_norm/clip_by_global_norm/_106" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_361" + op: "Add" + input: "Mul_575" + input: "Mul_576" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_577/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_577" + op: "Mul" + input: "Mul_577/x" + input: "bert/encoder/layer_6/attention/self/value/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_106" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_106" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_578/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_578" + op: "Mul" + input: "Mul_578/x" + input: "Square_106" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_362" + op: "Add" + input: "Mul_577" + input: "Mul_578" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_106" + op: "Sqrt" + input: "add_362" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_363/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_363" + op: "Add" + input: "Sqrt_106" + input: "add_363/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_107" + op: "RealDiv" + input: "add_361" + input: "add_363" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_579" + op: "Mul" + input: "add" + input: "truediv_107" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_107" + op: "Sub" + input: "bert/encoder/layer_6/attention/self/value/bias/read" + input: "mul_579" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_517" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/value/bias" + input: "sub_107" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_518" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/value/bias/adam_m" + input: "add_361" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_519" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/value/bias/adam_v" + input: "add_362" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_580/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_580" + op: "Mul" + input: "Mul_580/x" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_581/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_581" + op: "Mul" + input: "Mul_581/x" + input: "clip_by_global_norm/clip_by_global_norm/_107" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_364" + op: "Add" + input: "Mul_580" + input: "Mul_581" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_582/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_582" + op: "Mul" + input: "Mul_582/x" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_107" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_107" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_583/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_583" + op: "Mul" + input: "Mul_583/x" + input: "Square_107" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_365" + op: "Add" + input: "Mul_582" + input: "Mul_583" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_107" + op: "Sqrt" + input: "add_365" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_366/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_366" + op: "Add" + input: "Sqrt_107" + input: "add_366/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_108" + op: "RealDiv" + input: "add_364" + input: "add_366" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_584/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_584" + op: "Mul" + input: "mul_584/x" + input: "bert/encoder/layer_6/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_367" + op: "Add" + input: "truediv_108" + input: "mul_584" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_585" + op: "Mul" + input: "add" + input: "add_367" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_108" + op: "Sub" + input: "bert/encoder/layer_6/attention/output/dense/kernel/read" + input: "mul_585" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_520" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/dense/kernel" + input: "sub_108" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_521" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + input: "add_364" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_522" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + input: "add_365" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_6/attention/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_6/attention/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_586/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_586" + op: "Mul" + input: "Mul_586/x" + input: "bert/encoder/layer_6/attention/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_587/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_587" + op: "Mul" + input: "Mul_587/x" + input: "clip_by_global_norm/clip_by_global_norm/_108" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_368" + op: "Add" + input: "Mul_586" + input: "Mul_587" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_588/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_588" + op: "Mul" + input: "Mul_588/x" + input: "bert/encoder/layer_6/attention/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_108" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_108" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_589/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_589" + op: "Mul" + input: "Mul_589/x" + input: "Square_108" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_369" + op: "Add" + input: "Mul_588" + input: "Mul_589" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_108" + op: "Sqrt" + input: "add_369" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_370/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_370" + op: "Add" + input: "Sqrt_108" + input: "add_370/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_109" + op: "RealDiv" + input: "add_368" + input: "add_370" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_590" + op: "Mul" + input: "add" + input: "truediv_109" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_109" + op: "Sub" + input: "bert/encoder/layer_6/attention/output/dense/bias/read" + input: "mul_590" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_523" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/dense/bias" + input: "sub_109" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_524" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/dense/bias/adam_m" + input: "add_368" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_525" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/dense/bias/adam_v" + input: "add_369" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_591/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_591" + op: "Mul" + input: "Mul_591/x" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_592/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_592" + op: "Mul" + input: "Mul_592/x" + input: "clip_by_global_norm/clip_by_global_norm/_109" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_371" + op: "Add" + input: "Mul_591" + input: "Mul_592" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_593/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_593" + op: "Mul" + input: "Mul_593/x" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_109" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_109" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_594/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_594" + op: "Mul" + input: "Mul_594/x" + input: "Square_109" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_372" + op: "Add" + input: "Mul_593" + input: "Mul_594" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_109" + op: "Sqrt" + input: "add_372" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_373/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_373" + op: "Add" + input: "Sqrt_109" + input: "add_373/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_110" + op: "RealDiv" + input: "add_371" + input: "add_373" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_595" + op: "Mul" + input: "add" + input: "truediv_110" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_110" + op: "Sub" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/read" + input: "mul_595" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_526" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta" + input: "sub_110" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_527" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m" + input: "add_371" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_528" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v" + input: "add_372" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_596/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_596" + op: "Mul" + input: "Mul_596/x" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_597/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_597" + op: "Mul" + input: "Mul_597/x" + input: "clip_by_global_norm/clip_by_global_norm/_110" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_374" + op: "Add" + input: "Mul_596" + input: "Mul_597" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_598/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_598" + op: "Mul" + input: "Mul_598/x" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_110" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_110" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_599/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_599" + op: "Mul" + input: "Mul_599/x" + input: "Square_110" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_375" + op: "Add" + input: "Mul_598" + input: "Mul_599" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_110" + op: "Sqrt" + input: "add_375" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_376/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_376" + op: "Add" + input: "Sqrt_110" + input: "add_376/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_111" + op: "RealDiv" + input: "add_374" + input: "add_376" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_600" + op: "Mul" + input: "add" + input: "truediv_111" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_111" + op: "Sub" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/read" + input: "mul_600" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_529" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma" + input: "sub_111" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_530" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m" + input: "add_374" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_531" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v" + input: "add_375" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_601/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_601" + op: "Mul" + input: "Mul_601/x" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_602/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_602" + op: "Mul" + input: "Mul_602/x" + input: "clip_by_global_norm/clip_by_global_norm/_111" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_377" + op: "Add" + input: "Mul_601" + input: "Mul_602" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_603/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_603" + op: "Mul" + input: "Mul_603/x" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_111" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_111" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_604/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_604" + op: "Mul" + input: "Mul_604/x" + input: "Square_111" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_378" + op: "Add" + input: "Mul_603" + input: "Mul_604" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_111" + op: "Sqrt" + input: "add_378" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_379/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_379" + op: "Add" + input: "Sqrt_111" + input: "add_379/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_112" + op: "RealDiv" + input: "add_377" + input: "add_379" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_605/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_605" + op: "Mul" + input: "mul_605/x" + input: "bert/encoder/layer_6/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_380" + op: "Add" + input: "truediv_112" + input: "mul_605" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_606" + op: "Mul" + input: "add" + input: "add_380" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_112" + op: "Sub" + input: "bert/encoder/layer_6/intermediate/dense/kernel/read" + input: "mul_606" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_532" + op: "Assign" + input: "bert/encoder/layer_6/intermediate/dense/kernel" + input: "sub_112" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_533" + op: "Assign" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + input: "add_377" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_534" + op: "Assign" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + input: "add_378" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/bias/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/bias/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_607/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_607" + op: "Mul" + input: "Mul_607/x" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_608/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_608" + op: "Mul" + input: "Mul_608/x" + input: "clip_by_global_norm/clip_by_global_norm/_112" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_381" + op: "Add" + input: "Mul_607" + input: "Mul_608" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_609/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_609" + op: "Mul" + input: "Mul_609/x" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_112" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_112" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_610/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_610" + op: "Mul" + input: "Mul_610/x" + input: "Square_112" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_382" + op: "Add" + input: "Mul_609" + input: "Mul_610" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_112" + op: "Sqrt" + input: "add_382" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_383/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_383" + op: "Add" + input: "Sqrt_112" + input: "add_383/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_113" + op: "RealDiv" + input: "add_381" + input: "add_383" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_611" + op: "Mul" + input: "add" + input: "truediv_113" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_113" + op: "Sub" + input: "bert/encoder/layer_6/intermediate/dense/bias/read" + input: "mul_611" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_535" + op: "Assign" + input: "bert/encoder/layer_6/intermediate/dense/bias" + input: "sub_113" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_536" + op: "Assign" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_m" + input: "add_381" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_537" + op: "Assign" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_v" + input: "add_382" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_6/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_6/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_6/output/dense/kernel/adam_m" + input: "bert/encoder/layer_6/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_6/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_6/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_6/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_6/output/dense/kernel/adam_v" + input: "bert/encoder/layer_6/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_6/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_612/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_612" + op: "Mul" + input: "Mul_612/x" + input: "bert/encoder/layer_6/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_613/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_613" + op: "Mul" + input: "Mul_613/x" + input: "clip_by_global_norm/clip_by_global_norm/_113" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_384" + op: "Add" + input: "Mul_612" + input: "Mul_613" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_614/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_614" + op: "Mul" + input: "Mul_614/x" + input: "bert/encoder/layer_6/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_113" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_113" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_615/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_615" + op: "Mul" + input: "Mul_615/x" + input: "Square_113" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_385" + op: "Add" + input: "Mul_614" + input: "Mul_615" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_113" + op: "Sqrt" + input: "add_385" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_386/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_386" + op: "Add" + input: "Sqrt_113" + input: "add_386/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_114" + op: "RealDiv" + input: "add_384" + input: "add_386" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_616/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_616" + op: "Mul" + input: "mul_616/x" + input: "bert/encoder/layer_6/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_387" + op: "Add" + input: "truediv_114" + input: "mul_616" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_617" + op: "Mul" + input: "add" + input: "add_387" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_114" + op: "Sub" + input: "bert/encoder/layer_6/output/dense/kernel/read" + input: "mul_617" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_538" + op: "Assign" + input: "bert/encoder/layer_6/output/dense/kernel" + input: "sub_114" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_539" + op: "Assign" + input: "bert/encoder/layer_6/output/dense/kernel/adam_m" + input: "add_384" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_540" + op: "Assign" + input: "bert/encoder/layer_6/output/dense/kernel/adam_v" + input: "add_385" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_6/output/dense/bias/adam_m" + input: "bert/encoder/layer_6/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_6/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_6/output/dense/bias/adam_v" + input: "bert/encoder/layer_6/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_6/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_618/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_618" + op: "Mul" + input: "Mul_618/x" + input: "bert/encoder/layer_6/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_619/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_619" + op: "Mul" + input: "Mul_619/x" + input: "clip_by_global_norm/clip_by_global_norm/_114" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_388" + op: "Add" + input: "Mul_618" + input: "Mul_619" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_620/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_620" + op: "Mul" + input: "Mul_620/x" + input: "bert/encoder/layer_6/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_114" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_114" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_621/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_621" + op: "Mul" + input: "Mul_621/x" + input: "Square_114" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_389" + op: "Add" + input: "Mul_620" + input: "Mul_621" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_114" + op: "Sqrt" + input: "add_389" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_390/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_390" + op: "Add" + input: "Sqrt_114" + input: "add_390/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_115" + op: "RealDiv" + input: "add_388" + input: "add_390" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_622" + op: "Mul" + input: "add" + input: "truediv_115" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_115" + op: "Sub" + input: "bert/encoder/layer_6/output/dense/bias/read" + input: "mul_622" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_541" + op: "Assign" + input: "bert/encoder/layer_6/output/dense/bias" + input: "sub_115" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_542" + op: "Assign" + input: "bert/encoder/layer_6/output/dense/bias/adam_m" + input: "add_388" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_543" + op: "Assign" + input: "bert/encoder/layer_6/output/dense/bias/adam_v" + input: "add_389" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_623/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_623" + op: "Mul" + input: "Mul_623/x" + input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_624/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_624" + op: "Mul" + input: "Mul_624/x" + input: "clip_by_global_norm/clip_by_global_norm/_115" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_391" + op: "Add" + input: "Mul_623" + input: "Mul_624" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_625/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_625" + op: "Mul" + input: "Mul_625/x" + input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_115" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_115" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_626/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_626" + op: "Mul" + input: "Mul_626/x" + input: "Square_115" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_392" + op: "Add" + input: "Mul_625" + input: "Mul_626" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_115" + op: "Sqrt" + input: "add_392" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_393/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_393" + op: "Add" + input: "Sqrt_115" + input: "add_393/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_116" + op: "RealDiv" + input: "add_391" + input: "add_393" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_627" + op: "Mul" + input: "add" + input: "truediv_116" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_116" + op: "Sub" + input: "bert/encoder/layer_6/output/LayerNorm/beta/read" + input: "mul_627" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_544" + op: "Assign" + input: "bert/encoder/layer_6/output/LayerNorm/beta" + input: "sub_116" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_545" + op: "Assign" + input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m" + input: "add_391" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_546" + op: "Assign" + input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v" + input: "add_392" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_628/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_628" + op: "Mul" + input: "Mul_628/x" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_629/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_629" + op: "Mul" + input: "Mul_629/x" + input: "clip_by_global_norm/clip_by_global_norm/_116" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_394" + op: "Add" + input: "Mul_628" + input: "Mul_629" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_630/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_630" + op: "Mul" + input: "Mul_630/x" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_116" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_116" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_631/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_631" + op: "Mul" + input: "Mul_631/x" + input: "Square_116" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_395" + op: "Add" + input: "Mul_630" + input: "Mul_631" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_116" + op: "Sqrt" + input: "add_395" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_396/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_396" + op: "Add" + input: "Sqrt_116" + input: "add_396/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_117" + op: "RealDiv" + input: "add_394" + input: "add_396" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_632" + op: "Mul" + input: "add" + input: "truediv_117" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_117" + op: "Sub" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/read" + input: "mul_632" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_547" + op: "Assign" + input: "bert/encoder/layer_6/output/LayerNorm/gamma" + input: "sub_117" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_548" + op: "Assign" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m" + input: "add_394" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_549" + op: "Assign" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v" + input: "add_395" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_633/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_633" + op: "Mul" + input: "Mul_633/x" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_634/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_634" + op: "Mul" + input: "Mul_634/x" + input: "clip_by_global_norm/clip_by_global_norm/_117" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_397" + op: "Add" + input: "Mul_633" + input: "Mul_634" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_635/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_635" + op: "Mul" + input: "Mul_635/x" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_117" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_117" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_636/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_636" + op: "Mul" + input: "Mul_636/x" + input: "Square_117" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_398" + op: "Add" + input: "Mul_635" + input: "Mul_636" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_117" + op: "Sqrt" + input: "add_398" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_399/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_399" + op: "Add" + input: "Sqrt_117" + input: "add_399/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_118" + op: "RealDiv" + input: "add_397" + input: "add_399" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_637/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_637" + op: "Mul" + input: "mul_637/x" + input: "bert/encoder/layer_7/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_400" + op: "Add" + input: "truediv_118" + input: "mul_637" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_638" + op: "Mul" + input: "add" + input: "add_400" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_118" + op: "Sub" + input: "bert/encoder/layer_7/attention/self/query/kernel/read" + input: "mul_638" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_550" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/query/kernel" + input: "sub_118" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_551" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_m" + input: "add_397" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_552" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_v" + input: "add_398" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_7/attention/self/query/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/self/query/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_7/attention/self/query/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/self/query/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_639/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_639" + op: "Mul" + input: "Mul_639/x" + input: "bert/encoder/layer_7/attention/self/query/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_640/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_640" + op: "Mul" + input: "Mul_640/x" + input: "clip_by_global_norm/clip_by_global_norm/_118" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_401" + op: "Add" + input: "Mul_639" + input: "Mul_640" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_641/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_641" + op: "Mul" + input: "Mul_641/x" + input: "bert/encoder/layer_7/attention/self/query/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_118" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_118" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_642/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_642" + op: "Mul" + input: "Mul_642/x" + input: "Square_118" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_402" + op: "Add" + input: "Mul_641" + input: "Mul_642" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_118" + op: "Sqrt" + input: "add_402" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_403/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_403" + op: "Add" + input: "Sqrt_118" + input: "add_403/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_119" + op: "RealDiv" + input: "add_401" + input: "add_403" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_643" + op: "Mul" + input: "add" + input: "truediv_119" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_119" + op: "Sub" + input: "bert/encoder/layer_7/attention/self/query/bias/read" + input: "mul_643" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_553" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/query/bias" + input: "sub_119" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_554" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/query/bias/adam_m" + input: "add_401" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_555" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/query/bias/adam_v" + input: "add_402" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_644/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_644" + op: "Mul" + input: "Mul_644/x" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_645/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_645" + op: "Mul" + input: "Mul_645/x" + input: "clip_by_global_norm/clip_by_global_norm/_119" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_404" + op: "Add" + input: "Mul_644" + input: "Mul_645" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_646/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_646" + op: "Mul" + input: "Mul_646/x" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_119" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_119" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_647/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_647" + op: "Mul" + input: "Mul_647/x" + input: "Square_119" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_405" + op: "Add" + input: "Mul_646" + input: "Mul_647" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_119" + op: "Sqrt" + input: "add_405" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_406/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_406" + op: "Add" + input: "Sqrt_119" + input: "add_406/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_120" + op: "RealDiv" + input: "add_404" + input: "add_406" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_648/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_648" + op: "Mul" + input: "mul_648/x" + input: "bert/encoder/layer_7/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_407" + op: "Add" + input: "truediv_120" + input: "mul_648" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_649" + op: "Mul" + input: "add" + input: "add_407" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_120" + op: "Sub" + input: "bert/encoder/layer_7/attention/self/key/kernel/read" + input: "mul_649" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_556" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/key/kernel" + input: "sub_120" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_557" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_m" + input: "add_404" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_558" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_v" + input: "add_405" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_7/attention/self/key/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/self/key/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_7/attention/self/key/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/self/key/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_650/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_650" + op: "Mul" + input: "Mul_650/x" + input: "bert/encoder/layer_7/attention/self/key/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_651/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_651" + op: "Mul" + input: "Mul_651/x" + input: "clip_by_global_norm/clip_by_global_norm/_120" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_408" + op: "Add" + input: "Mul_650" + input: "Mul_651" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_652/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_652" + op: "Mul" + input: "Mul_652/x" + input: "bert/encoder/layer_7/attention/self/key/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_120" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_120" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_653/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_653" + op: "Mul" + input: "Mul_653/x" + input: "Square_120" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_409" + op: "Add" + input: "Mul_652" + input: "Mul_653" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_120" + op: "Sqrt" + input: "add_409" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_410/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_410" + op: "Add" + input: "Sqrt_120" + input: "add_410/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_121" + op: "RealDiv" + input: "add_408" + input: "add_410" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_654" + op: "Mul" + input: "add" + input: "truediv_121" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_121" + op: "Sub" + input: "bert/encoder/layer_7/attention/self/key/bias/read" + input: "mul_654" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_559" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/key/bias" + input: "sub_121" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_560" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/key/bias/adam_m" + input: "add_408" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_561" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/key/bias/adam_v" + input: "add_409" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_655/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_655" + op: "Mul" + input: "Mul_655/x" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_656/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_656" + op: "Mul" + input: "Mul_656/x" + input: "clip_by_global_norm/clip_by_global_norm/_121" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_411" + op: "Add" + input: "Mul_655" + input: "Mul_656" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_657/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_657" + op: "Mul" + input: "Mul_657/x" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_121" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_121" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_658/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_658" + op: "Mul" + input: "Mul_658/x" + input: "Square_121" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_412" + op: "Add" + input: "Mul_657" + input: "Mul_658" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_121" + op: "Sqrt" + input: "add_412" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_413/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_413" + op: "Add" + input: "Sqrt_121" + input: "add_413/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_122" + op: "RealDiv" + input: "add_411" + input: "add_413" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_659/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_659" + op: "Mul" + input: "mul_659/x" + input: "bert/encoder/layer_7/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_414" + op: "Add" + input: "truediv_122" + input: "mul_659" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_660" + op: "Mul" + input: "add" + input: "add_414" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_122" + op: "Sub" + input: "bert/encoder/layer_7/attention/self/value/kernel/read" + input: "mul_660" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_562" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/value/kernel" + input: "sub_122" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_563" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_m" + input: "add_411" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_564" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_v" + input: "add_412" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_7/attention/self/value/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/self/value/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_7/attention/self/value/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/self/value/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_661/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_661" + op: "Mul" + input: "Mul_661/x" + input: "bert/encoder/layer_7/attention/self/value/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_662/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_662" + op: "Mul" + input: "Mul_662/x" + input: "clip_by_global_norm/clip_by_global_norm/_122" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_415" + op: "Add" + input: "Mul_661" + input: "Mul_662" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_663/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_663" + op: "Mul" + input: "Mul_663/x" + input: "bert/encoder/layer_7/attention/self/value/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_122" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_122" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_664/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_664" + op: "Mul" + input: "Mul_664/x" + input: "Square_122" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_416" + op: "Add" + input: "Mul_663" + input: "Mul_664" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_122" + op: "Sqrt" + input: "add_416" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_417/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_417" + op: "Add" + input: "Sqrt_122" + input: "add_417/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_123" + op: "RealDiv" + input: "add_415" + input: "add_417" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_665" + op: "Mul" + input: "add" + input: "truediv_123" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_123" + op: "Sub" + input: "bert/encoder/layer_7/attention/self/value/bias/read" + input: "mul_665" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_565" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/value/bias" + input: "sub_123" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_566" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/value/bias/adam_m" + input: "add_415" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_567" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/value/bias/adam_v" + input: "add_416" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_666/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_666" + op: "Mul" + input: "Mul_666/x" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_667/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_667" + op: "Mul" + input: "Mul_667/x" + input: "clip_by_global_norm/clip_by_global_norm/_123" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_418" + op: "Add" + input: "Mul_666" + input: "Mul_667" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_668/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_668" + op: "Mul" + input: "Mul_668/x" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_123" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_123" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_669/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_669" + op: "Mul" + input: "Mul_669/x" + input: "Square_123" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_419" + op: "Add" + input: "Mul_668" + input: "Mul_669" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_123" + op: "Sqrt" + input: "add_419" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_420/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_420" + op: "Add" + input: "Sqrt_123" + input: "add_420/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_124" + op: "RealDiv" + input: "add_418" + input: "add_420" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_670/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_670" + op: "Mul" + input: "mul_670/x" + input: "bert/encoder/layer_7/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_421" + op: "Add" + input: "truediv_124" + input: "mul_670" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_671" + op: "Mul" + input: "add" + input: "add_421" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_124" + op: "Sub" + input: "bert/encoder/layer_7/attention/output/dense/kernel/read" + input: "mul_671" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_568" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/dense/kernel" + input: "sub_124" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_569" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + input: "add_418" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_570" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + input: "add_419" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_7/attention/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_7/attention/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_672/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_672" + op: "Mul" + input: "Mul_672/x" + input: "bert/encoder/layer_7/attention/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_673/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_673" + op: "Mul" + input: "Mul_673/x" + input: "clip_by_global_norm/clip_by_global_norm/_124" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_422" + op: "Add" + input: "Mul_672" + input: "Mul_673" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_674/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_674" + op: "Mul" + input: "Mul_674/x" + input: "bert/encoder/layer_7/attention/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_124" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_124" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_675/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_675" + op: "Mul" + input: "Mul_675/x" + input: "Square_124" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_423" + op: "Add" + input: "Mul_674" + input: "Mul_675" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_124" + op: "Sqrt" + input: "add_423" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_424/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_424" + op: "Add" + input: "Sqrt_124" + input: "add_424/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_125" + op: "RealDiv" + input: "add_422" + input: "add_424" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_676" + op: "Mul" + input: "add" + input: "truediv_125" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_125" + op: "Sub" + input: "bert/encoder/layer_7/attention/output/dense/bias/read" + input: "mul_676" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_571" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/dense/bias" + input: "sub_125" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_572" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/dense/bias/adam_m" + input: "add_422" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_573" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/dense/bias/adam_v" + input: "add_423" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_677/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_677" + op: "Mul" + input: "Mul_677/x" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_678/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_678" + op: "Mul" + input: "Mul_678/x" + input: "clip_by_global_norm/clip_by_global_norm/_125" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_425" + op: "Add" + input: "Mul_677" + input: "Mul_678" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_679/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_679" + op: "Mul" + input: "Mul_679/x" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_125" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_125" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_680/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_680" + op: "Mul" + input: "Mul_680/x" + input: "Square_125" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_426" + op: "Add" + input: "Mul_679" + input: "Mul_680" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_125" + op: "Sqrt" + input: "add_426" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_427/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_427" + op: "Add" + input: "Sqrt_125" + input: "add_427/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_126" + op: "RealDiv" + input: "add_425" + input: "add_427" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_681" + op: "Mul" + input: "add" + input: "truediv_126" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_126" + op: "Sub" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/read" + input: "mul_681" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_574" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta" + input: "sub_126" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_575" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m" + input: "add_425" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_576" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v" + input: "add_426" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_682/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_682" + op: "Mul" + input: "Mul_682/x" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_683/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_683" + op: "Mul" + input: "Mul_683/x" + input: "clip_by_global_norm/clip_by_global_norm/_126" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_428" + op: "Add" + input: "Mul_682" + input: "Mul_683" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_684/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_684" + op: "Mul" + input: "Mul_684/x" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_126" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_126" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_685/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_685" + op: "Mul" + input: "Mul_685/x" + input: "Square_126" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_429" + op: "Add" + input: "Mul_684" + input: "Mul_685" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_126" + op: "Sqrt" + input: "add_429" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_430/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_430" + op: "Add" + input: "Sqrt_126" + input: "add_430/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_127" + op: "RealDiv" + input: "add_428" + input: "add_430" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_686" + op: "Mul" + input: "add" + input: "truediv_127" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_127" + op: "Sub" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/read" + input: "mul_686" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_577" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma" + input: "sub_127" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_578" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m" + input: "add_428" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_579" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v" + input: "add_429" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_687/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_687" + op: "Mul" + input: "Mul_687/x" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_688/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_688" + op: "Mul" + input: "Mul_688/x" + input: "clip_by_global_norm/clip_by_global_norm/_127" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_431" + op: "Add" + input: "Mul_687" + input: "Mul_688" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_689/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_689" + op: "Mul" + input: "Mul_689/x" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_127" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_127" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_690/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_690" + op: "Mul" + input: "Mul_690/x" + input: "Square_127" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_432" + op: "Add" + input: "Mul_689" + input: "Mul_690" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_127" + op: "Sqrt" + input: "add_432" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_433/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_433" + op: "Add" + input: "Sqrt_127" + input: "add_433/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_128" + op: "RealDiv" + input: "add_431" + input: "add_433" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_691/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_691" + op: "Mul" + input: "mul_691/x" + input: "bert/encoder/layer_7/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_434" + op: "Add" + input: "truediv_128" + input: "mul_691" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_692" + op: "Mul" + input: "add" + input: "add_434" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_128" + op: "Sub" + input: "bert/encoder/layer_7/intermediate/dense/kernel/read" + input: "mul_692" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_580" + op: "Assign" + input: "bert/encoder/layer_7/intermediate/dense/kernel" + input: "sub_128" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_581" + op: "Assign" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + input: "add_431" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_582" + op: "Assign" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + input: "add_432" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/bias/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/bias/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_693/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_693" + op: "Mul" + input: "Mul_693/x" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_694/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_694" + op: "Mul" + input: "Mul_694/x" + input: "clip_by_global_norm/clip_by_global_norm/_128" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_435" + op: "Add" + input: "Mul_693" + input: "Mul_694" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_695/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_695" + op: "Mul" + input: "Mul_695/x" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_128" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_128" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_696/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_696" + op: "Mul" + input: "Mul_696/x" + input: "Square_128" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_436" + op: "Add" + input: "Mul_695" + input: "Mul_696" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_128" + op: "Sqrt" + input: "add_436" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_437/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_437" + op: "Add" + input: "Sqrt_128" + input: "add_437/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_129" + op: "RealDiv" + input: "add_435" + input: "add_437" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_697" + op: "Mul" + input: "add" + input: "truediv_129" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_129" + op: "Sub" + input: "bert/encoder/layer_7/intermediate/dense/bias/read" + input: "mul_697" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_583" + op: "Assign" + input: "bert/encoder/layer_7/intermediate/dense/bias" + input: "sub_129" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_584" + op: "Assign" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_m" + input: "add_435" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_585" + op: "Assign" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_v" + input: "add_436" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_7/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_7/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_7/output/dense/kernel/adam_m" + input: "bert/encoder/layer_7/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_7/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_7/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_7/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_7/output/dense/kernel/adam_v" + input: "bert/encoder/layer_7/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_7/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_698/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_698" + op: "Mul" + input: "Mul_698/x" + input: "bert/encoder/layer_7/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_699/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_699" + op: "Mul" + input: "Mul_699/x" + input: "clip_by_global_norm/clip_by_global_norm/_129" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_438" + op: "Add" + input: "Mul_698" + input: "Mul_699" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_700/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_700" + op: "Mul" + input: "Mul_700/x" + input: "bert/encoder/layer_7/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_129" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_129" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_701/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_701" + op: "Mul" + input: "Mul_701/x" + input: "Square_129" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_439" + op: "Add" + input: "Mul_700" + input: "Mul_701" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_129" + op: "Sqrt" + input: "add_439" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_440/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_440" + op: "Add" + input: "Sqrt_129" + input: "add_440/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_130" + op: "RealDiv" + input: "add_438" + input: "add_440" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_702/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_702" + op: "Mul" + input: "mul_702/x" + input: "bert/encoder/layer_7/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_441" + op: "Add" + input: "truediv_130" + input: "mul_702" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_703" + op: "Mul" + input: "add" + input: "add_441" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_130" + op: "Sub" + input: "bert/encoder/layer_7/output/dense/kernel/read" + input: "mul_703" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_586" + op: "Assign" + input: "bert/encoder/layer_7/output/dense/kernel" + input: "sub_130" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_587" + op: "Assign" + input: "bert/encoder/layer_7/output/dense/kernel/adam_m" + input: "add_438" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_588" + op: "Assign" + input: "bert/encoder/layer_7/output/dense/kernel/adam_v" + input: "add_439" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_7/output/dense/bias/adam_m" + input: "bert/encoder/layer_7/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_7/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_7/output/dense/bias/adam_v" + input: "bert/encoder/layer_7/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_7/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_704/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_704" + op: "Mul" + input: "Mul_704/x" + input: "bert/encoder/layer_7/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_705/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_705" + op: "Mul" + input: "Mul_705/x" + input: "clip_by_global_norm/clip_by_global_norm/_130" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_442" + op: "Add" + input: "Mul_704" + input: "Mul_705" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_706/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_706" + op: "Mul" + input: "Mul_706/x" + input: "bert/encoder/layer_7/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_130" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_130" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_707/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_707" + op: "Mul" + input: "Mul_707/x" + input: "Square_130" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_443" + op: "Add" + input: "Mul_706" + input: "Mul_707" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_130" + op: "Sqrt" + input: "add_443" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_444/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_444" + op: "Add" + input: "Sqrt_130" + input: "add_444/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_131" + op: "RealDiv" + input: "add_442" + input: "add_444" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_708" + op: "Mul" + input: "add" + input: "truediv_131" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_131" + op: "Sub" + input: "bert/encoder/layer_7/output/dense/bias/read" + input: "mul_708" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_589" + op: "Assign" + input: "bert/encoder/layer_7/output/dense/bias" + input: "sub_131" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_590" + op: "Assign" + input: "bert/encoder/layer_7/output/dense/bias/adam_m" + input: "add_442" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_591" + op: "Assign" + input: "bert/encoder/layer_7/output/dense/bias/adam_v" + input: "add_443" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_709/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_709" + op: "Mul" + input: "Mul_709/x" + input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_710/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_710" + op: "Mul" + input: "Mul_710/x" + input: "clip_by_global_norm/clip_by_global_norm/_131" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_445" + op: "Add" + input: "Mul_709" + input: "Mul_710" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_711/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_711" + op: "Mul" + input: "Mul_711/x" + input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_131" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_131" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_712/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_712" + op: "Mul" + input: "Mul_712/x" + input: "Square_131" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_446" + op: "Add" + input: "Mul_711" + input: "Mul_712" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_131" + op: "Sqrt" + input: "add_446" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_447/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_447" + op: "Add" + input: "Sqrt_131" + input: "add_447/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_132" + op: "RealDiv" + input: "add_445" + input: "add_447" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_713" + op: "Mul" + input: "add" + input: "truediv_132" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_132" + op: "Sub" + input: "bert/encoder/layer_7/output/LayerNorm/beta/read" + input: "mul_713" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_592" + op: "Assign" + input: "bert/encoder/layer_7/output/LayerNorm/beta" + input: "sub_132" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_593" + op: "Assign" + input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m" + input: "add_445" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_594" + op: "Assign" + input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v" + input: "add_446" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_714/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_714" + op: "Mul" + input: "Mul_714/x" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_715/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_715" + op: "Mul" + input: "Mul_715/x" + input: "clip_by_global_norm/clip_by_global_norm/_132" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_448" + op: "Add" + input: "Mul_714" + input: "Mul_715" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_716/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_716" + op: "Mul" + input: "Mul_716/x" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_132" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_132" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_717/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_717" + op: "Mul" + input: "Mul_717/x" + input: "Square_132" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_449" + op: "Add" + input: "Mul_716" + input: "Mul_717" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_132" + op: "Sqrt" + input: "add_449" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_450/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_450" + op: "Add" + input: "Sqrt_132" + input: "add_450/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_133" + op: "RealDiv" + input: "add_448" + input: "add_450" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_718" + op: "Mul" + input: "add" + input: "truediv_133" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_133" + op: "Sub" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/read" + input: "mul_718" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_595" + op: "Assign" + input: "bert/encoder/layer_7/output/LayerNorm/gamma" + input: "sub_133" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_596" + op: "Assign" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m" + input: "add_448" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_597" + op: "Assign" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v" + input: "add_449" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_719/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_719" + op: "Mul" + input: "Mul_719/x" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_720/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_720" + op: "Mul" + input: "Mul_720/x" + input: "clip_by_global_norm/clip_by_global_norm/_133" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_451" + op: "Add" + input: "Mul_719" + input: "Mul_720" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_721/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_721" + op: "Mul" + input: "Mul_721/x" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_133" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_133" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_722/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_722" + op: "Mul" + input: "Mul_722/x" + input: "Square_133" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_452" + op: "Add" + input: "Mul_721" + input: "Mul_722" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_133" + op: "Sqrt" + input: "add_452" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_453/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_453" + op: "Add" + input: "Sqrt_133" + input: "add_453/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_134" + op: "RealDiv" + input: "add_451" + input: "add_453" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_723/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_723" + op: "Mul" + input: "mul_723/x" + input: "bert/encoder/layer_8/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_454" + op: "Add" + input: "truediv_134" + input: "mul_723" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_724" + op: "Mul" + input: "add" + input: "add_454" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_134" + op: "Sub" + input: "bert/encoder/layer_8/attention/self/query/kernel/read" + input: "mul_724" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_598" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/query/kernel" + input: "sub_134" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_599" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_m" + input: "add_451" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_600" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_v" + input: "add_452" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_8/attention/self/query/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/self/query/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_8/attention/self/query/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/self/query/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_725/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_725" + op: "Mul" + input: "Mul_725/x" + input: "bert/encoder/layer_8/attention/self/query/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_726/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_726" + op: "Mul" + input: "Mul_726/x" + input: "clip_by_global_norm/clip_by_global_norm/_134" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_455" + op: "Add" + input: "Mul_725" + input: "Mul_726" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_727/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_727" + op: "Mul" + input: "Mul_727/x" + input: "bert/encoder/layer_8/attention/self/query/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_134" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_134" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_728/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_728" + op: "Mul" + input: "Mul_728/x" + input: "Square_134" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_456" + op: "Add" + input: "Mul_727" + input: "Mul_728" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_134" + op: "Sqrt" + input: "add_456" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_457/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_457" + op: "Add" + input: "Sqrt_134" + input: "add_457/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_135" + op: "RealDiv" + input: "add_455" + input: "add_457" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_729" + op: "Mul" + input: "add" + input: "truediv_135" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_135" + op: "Sub" + input: "bert/encoder/layer_8/attention/self/query/bias/read" + input: "mul_729" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_601" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/query/bias" + input: "sub_135" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_602" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/query/bias/adam_m" + input: "add_455" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_603" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/query/bias/adam_v" + input: "add_456" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_730/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_730" + op: "Mul" + input: "Mul_730/x" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_731/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_731" + op: "Mul" + input: "Mul_731/x" + input: "clip_by_global_norm/clip_by_global_norm/_135" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_458" + op: "Add" + input: "Mul_730" + input: "Mul_731" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_732/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_732" + op: "Mul" + input: "Mul_732/x" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_135" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_135" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_733/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_733" + op: "Mul" + input: "Mul_733/x" + input: "Square_135" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_459" + op: "Add" + input: "Mul_732" + input: "Mul_733" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_135" + op: "Sqrt" + input: "add_459" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_460/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_460" + op: "Add" + input: "Sqrt_135" + input: "add_460/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_136" + op: "RealDiv" + input: "add_458" + input: "add_460" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_734/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_734" + op: "Mul" + input: "mul_734/x" + input: "bert/encoder/layer_8/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_461" + op: "Add" + input: "truediv_136" + input: "mul_734" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_735" + op: "Mul" + input: "add" + input: "add_461" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_136" + op: "Sub" + input: "bert/encoder/layer_8/attention/self/key/kernel/read" + input: "mul_735" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_604" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/key/kernel" + input: "sub_136" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_605" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_m" + input: "add_458" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_606" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_v" + input: "add_459" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_8/attention/self/key/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/self/key/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_8/attention/self/key/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/self/key/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_736/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_736" + op: "Mul" + input: "Mul_736/x" + input: "bert/encoder/layer_8/attention/self/key/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_737/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_737" + op: "Mul" + input: "Mul_737/x" + input: "clip_by_global_norm/clip_by_global_norm/_136" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_462" + op: "Add" + input: "Mul_736" + input: "Mul_737" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_738/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_738" + op: "Mul" + input: "Mul_738/x" + input: "bert/encoder/layer_8/attention/self/key/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_136" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_136" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_739/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_739" + op: "Mul" + input: "Mul_739/x" + input: "Square_136" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_463" + op: "Add" + input: "Mul_738" + input: "Mul_739" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_136" + op: "Sqrt" + input: "add_463" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_464/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_464" + op: "Add" + input: "Sqrt_136" + input: "add_464/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_137" + op: "RealDiv" + input: "add_462" + input: "add_464" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_740" + op: "Mul" + input: "add" + input: "truediv_137" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_137" + op: "Sub" + input: "bert/encoder/layer_8/attention/self/key/bias/read" + input: "mul_740" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_607" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/key/bias" + input: "sub_137" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_608" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/key/bias/adam_m" + input: "add_462" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_609" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/key/bias/adam_v" + input: "add_463" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_741/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_741" + op: "Mul" + input: "Mul_741/x" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_742/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_742" + op: "Mul" + input: "Mul_742/x" + input: "clip_by_global_norm/clip_by_global_norm/_137" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_465" + op: "Add" + input: "Mul_741" + input: "Mul_742" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_743/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_743" + op: "Mul" + input: "Mul_743/x" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_137" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_137" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_744/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_744" + op: "Mul" + input: "Mul_744/x" + input: "Square_137" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_466" + op: "Add" + input: "Mul_743" + input: "Mul_744" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_137" + op: "Sqrt" + input: "add_466" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_467/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_467" + op: "Add" + input: "Sqrt_137" + input: "add_467/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_138" + op: "RealDiv" + input: "add_465" + input: "add_467" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_745/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_745" + op: "Mul" + input: "mul_745/x" + input: "bert/encoder/layer_8/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_468" + op: "Add" + input: "truediv_138" + input: "mul_745" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_746" + op: "Mul" + input: "add" + input: "add_468" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_138" + op: "Sub" + input: "bert/encoder/layer_8/attention/self/value/kernel/read" + input: "mul_746" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_610" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/value/kernel" + input: "sub_138" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_611" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_m" + input: "add_465" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_612" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_v" + input: "add_466" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_8/attention/self/value/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/self/value/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_8/attention/self/value/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/self/value/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_747/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_747" + op: "Mul" + input: "Mul_747/x" + input: "bert/encoder/layer_8/attention/self/value/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_748/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_748" + op: "Mul" + input: "Mul_748/x" + input: "clip_by_global_norm/clip_by_global_norm/_138" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_469" + op: "Add" + input: "Mul_747" + input: "Mul_748" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_749/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_749" + op: "Mul" + input: "Mul_749/x" + input: "bert/encoder/layer_8/attention/self/value/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_138" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_138" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_750/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_750" + op: "Mul" + input: "Mul_750/x" + input: "Square_138" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_470" + op: "Add" + input: "Mul_749" + input: "Mul_750" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_138" + op: "Sqrt" + input: "add_470" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_471/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_471" + op: "Add" + input: "Sqrt_138" + input: "add_471/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_139" + op: "RealDiv" + input: "add_469" + input: "add_471" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_751" + op: "Mul" + input: "add" + input: "truediv_139" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_139" + op: "Sub" + input: "bert/encoder/layer_8/attention/self/value/bias/read" + input: "mul_751" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_613" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/value/bias" + input: "sub_139" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_614" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/value/bias/adam_m" + input: "add_469" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_615" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/value/bias/adam_v" + input: "add_470" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_752/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_752" + op: "Mul" + input: "Mul_752/x" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_753/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_753" + op: "Mul" + input: "Mul_753/x" + input: "clip_by_global_norm/clip_by_global_norm/_139" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_472" + op: "Add" + input: "Mul_752" + input: "Mul_753" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_754/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_754" + op: "Mul" + input: "Mul_754/x" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_139" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_139" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_755/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_755" + op: "Mul" + input: "Mul_755/x" + input: "Square_139" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_473" + op: "Add" + input: "Mul_754" + input: "Mul_755" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_139" + op: "Sqrt" + input: "add_473" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_474/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_474" + op: "Add" + input: "Sqrt_139" + input: "add_474/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_140" + op: "RealDiv" + input: "add_472" + input: "add_474" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_756/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_756" + op: "Mul" + input: "mul_756/x" + input: "bert/encoder/layer_8/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_475" + op: "Add" + input: "truediv_140" + input: "mul_756" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_757" + op: "Mul" + input: "add" + input: "add_475" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_140" + op: "Sub" + input: "bert/encoder/layer_8/attention/output/dense/kernel/read" + input: "mul_757" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_616" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/dense/kernel" + input: "sub_140" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_617" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + input: "add_472" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_618" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + input: "add_473" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_8/attention/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_8/attention/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_758/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_758" + op: "Mul" + input: "Mul_758/x" + input: "bert/encoder/layer_8/attention/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_759/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_759" + op: "Mul" + input: "Mul_759/x" + input: "clip_by_global_norm/clip_by_global_norm/_140" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_476" + op: "Add" + input: "Mul_758" + input: "Mul_759" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_760/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_760" + op: "Mul" + input: "Mul_760/x" + input: "bert/encoder/layer_8/attention/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_140" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_140" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_761/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_761" + op: "Mul" + input: "Mul_761/x" + input: "Square_140" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_477" + op: "Add" + input: "Mul_760" + input: "Mul_761" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_140" + op: "Sqrt" + input: "add_477" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_478/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_478" + op: "Add" + input: "Sqrt_140" + input: "add_478/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_141" + op: "RealDiv" + input: "add_476" + input: "add_478" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_762" + op: "Mul" + input: "add" + input: "truediv_141" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_141" + op: "Sub" + input: "bert/encoder/layer_8/attention/output/dense/bias/read" + input: "mul_762" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_619" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/dense/bias" + input: "sub_141" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_620" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/dense/bias/adam_m" + input: "add_476" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_621" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/dense/bias/adam_v" + input: "add_477" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_763/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_763" + op: "Mul" + input: "Mul_763/x" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_764/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_764" + op: "Mul" + input: "Mul_764/x" + input: "clip_by_global_norm/clip_by_global_norm/_141" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_479" + op: "Add" + input: "Mul_763" + input: "Mul_764" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_765/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_765" + op: "Mul" + input: "Mul_765/x" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_141" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_141" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_766/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_766" + op: "Mul" + input: "Mul_766/x" + input: "Square_141" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_480" + op: "Add" + input: "Mul_765" + input: "Mul_766" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_141" + op: "Sqrt" + input: "add_480" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_481/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_481" + op: "Add" + input: "Sqrt_141" + input: "add_481/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_142" + op: "RealDiv" + input: "add_479" + input: "add_481" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_767" + op: "Mul" + input: "add" + input: "truediv_142" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_142" + op: "Sub" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/read" + input: "mul_767" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_622" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta" + input: "sub_142" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_623" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m" + input: "add_479" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_624" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v" + input: "add_480" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_768/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_768" + op: "Mul" + input: "Mul_768/x" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_769/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_769" + op: "Mul" + input: "Mul_769/x" + input: "clip_by_global_norm/clip_by_global_norm/_142" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_482" + op: "Add" + input: "Mul_768" + input: "Mul_769" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_770/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_770" + op: "Mul" + input: "Mul_770/x" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_142" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_142" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_771/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_771" + op: "Mul" + input: "Mul_771/x" + input: "Square_142" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_483" + op: "Add" + input: "Mul_770" + input: "Mul_771" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_142" + op: "Sqrt" + input: "add_483" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_484/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_484" + op: "Add" + input: "Sqrt_142" + input: "add_484/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_143" + op: "RealDiv" + input: "add_482" + input: "add_484" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_772" + op: "Mul" + input: "add" + input: "truediv_143" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_143" + op: "Sub" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/read" + input: "mul_772" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_625" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma" + input: "sub_143" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_626" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m" + input: "add_482" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_627" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v" + input: "add_483" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_773/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_773" + op: "Mul" + input: "Mul_773/x" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_774/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_774" + op: "Mul" + input: "Mul_774/x" + input: "clip_by_global_norm/clip_by_global_norm/_143" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_485" + op: "Add" + input: "Mul_773" + input: "Mul_774" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_775/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_775" + op: "Mul" + input: "Mul_775/x" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_143" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_143" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_776/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_776" + op: "Mul" + input: "Mul_776/x" + input: "Square_143" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_486" + op: "Add" + input: "Mul_775" + input: "Mul_776" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_143" + op: "Sqrt" + input: "add_486" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_487/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_487" + op: "Add" + input: "Sqrt_143" + input: "add_487/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_144" + op: "RealDiv" + input: "add_485" + input: "add_487" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_777/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_777" + op: "Mul" + input: "mul_777/x" + input: "bert/encoder/layer_8/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_488" + op: "Add" + input: "truediv_144" + input: "mul_777" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_778" + op: "Mul" + input: "add" + input: "add_488" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_144" + op: "Sub" + input: "bert/encoder/layer_8/intermediate/dense/kernel/read" + input: "mul_778" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_628" + op: "Assign" + input: "bert/encoder/layer_8/intermediate/dense/kernel" + input: "sub_144" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_629" + op: "Assign" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + input: "add_485" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_630" + op: "Assign" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + input: "add_486" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/bias/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/bias/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_779/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_779" + op: "Mul" + input: "Mul_779/x" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_780/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_780" + op: "Mul" + input: "Mul_780/x" + input: "clip_by_global_norm/clip_by_global_norm/_144" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_489" + op: "Add" + input: "Mul_779" + input: "Mul_780" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_781/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_781" + op: "Mul" + input: "Mul_781/x" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_144" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_144" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_782/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_782" + op: "Mul" + input: "Mul_782/x" + input: "Square_144" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_490" + op: "Add" + input: "Mul_781" + input: "Mul_782" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_144" + op: "Sqrt" + input: "add_490" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_491/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_491" + op: "Add" + input: "Sqrt_144" + input: "add_491/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_145" + op: "RealDiv" + input: "add_489" + input: "add_491" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_783" + op: "Mul" + input: "add" + input: "truediv_145" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_145" + op: "Sub" + input: "bert/encoder/layer_8/intermediate/dense/bias/read" + input: "mul_783" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_631" + op: "Assign" + input: "bert/encoder/layer_8/intermediate/dense/bias" + input: "sub_145" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_632" + op: "Assign" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_m" + input: "add_489" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_633" + op: "Assign" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_v" + input: "add_490" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_8/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_8/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_8/output/dense/kernel/adam_m" + input: "bert/encoder/layer_8/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_8/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_8/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_8/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_8/output/dense/kernel/adam_v" + input: "bert/encoder/layer_8/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_8/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_784/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_784" + op: "Mul" + input: "Mul_784/x" + input: "bert/encoder/layer_8/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_785/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_785" + op: "Mul" + input: "Mul_785/x" + input: "clip_by_global_norm/clip_by_global_norm/_145" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_492" + op: "Add" + input: "Mul_784" + input: "Mul_785" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_786/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_786" + op: "Mul" + input: "Mul_786/x" + input: "bert/encoder/layer_8/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_145" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_145" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_787/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_787" + op: "Mul" + input: "Mul_787/x" + input: "Square_145" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_493" + op: "Add" + input: "Mul_786" + input: "Mul_787" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_145" + op: "Sqrt" + input: "add_493" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_494/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_494" + op: "Add" + input: "Sqrt_145" + input: "add_494/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_146" + op: "RealDiv" + input: "add_492" + input: "add_494" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_788/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_788" + op: "Mul" + input: "mul_788/x" + input: "bert/encoder/layer_8/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_495" + op: "Add" + input: "truediv_146" + input: "mul_788" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_789" + op: "Mul" + input: "add" + input: "add_495" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_146" + op: "Sub" + input: "bert/encoder/layer_8/output/dense/kernel/read" + input: "mul_789" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_634" + op: "Assign" + input: "bert/encoder/layer_8/output/dense/kernel" + input: "sub_146" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_635" + op: "Assign" + input: "bert/encoder/layer_8/output/dense/kernel/adam_m" + input: "add_492" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_636" + op: "Assign" + input: "bert/encoder/layer_8/output/dense/kernel/adam_v" + input: "add_493" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_8/output/dense/bias/adam_m" + input: "bert/encoder/layer_8/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_8/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_8/output/dense/bias/adam_v" + input: "bert/encoder/layer_8/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_8/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_790/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_790" + op: "Mul" + input: "Mul_790/x" + input: "bert/encoder/layer_8/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_791/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_791" + op: "Mul" + input: "Mul_791/x" + input: "clip_by_global_norm/clip_by_global_norm/_146" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_496" + op: "Add" + input: "Mul_790" + input: "Mul_791" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_792/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_792" + op: "Mul" + input: "Mul_792/x" + input: "bert/encoder/layer_8/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_146" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_146" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_793/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_793" + op: "Mul" + input: "Mul_793/x" + input: "Square_146" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_497" + op: "Add" + input: "Mul_792" + input: "Mul_793" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_146" + op: "Sqrt" + input: "add_497" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_498/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_498" + op: "Add" + input: "Sqrt_146" + input: "add_498/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_147" + op: "RealDiv" + input: "add_496" + input: "add_498" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_794" + op: "Mul" + input: "add" + input: "truediv_147" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_147" + op: "Sub" + input: "bert/encoder/layer_8/output/dense/bias/read" + input: "mul_794" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_637" + op: "Assign" + input: "bert/encoder/layer_8/output/dense/bias" + input: "sub_147" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_638" + op: "Assign" + input: "bert/encoder/layer_8/output/dense/bias/adam_m" + input: "add_496" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_639" + op: "Assign" + input: "bert/encoder/layer_8/output/dense/bias/adam_v" + input: "add_497" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_795/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_795" + op: "Mul" + input: "Mul_795/x" + input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_796/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_796" + op: "Mul" + input: "Mul_796/x" + input: "clip_by_global_norm/clip_by_global_norm/_147" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_499" + op: "Add" + input: "Mul_795" + input: "Mul_796" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_797/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_797" + op: "Mul" + input: "Mul_797/x" + input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_147" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_147" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_798/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_798" + op: "Mul" + input: "Mul_798/x" + input: "Square_147" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_500" + op: "Add" + input: "Mul_797" + input: "Mul_798" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_147" + op: "Sqrt" + input: "add_500" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_501/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_501" + op: "Add" + input: "Sqrt_147" + input: "add_501/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_148" + op: "RealDiv" + input: "add_499" + input: "add_501" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_799" + op: "Mul" + input: "add" + input: "truediv_148" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_148" + op: "Sub" + input: "bert/encoder/layer_8/output/LayerNorm/beta/read" + input: "mul_799" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_640" + op: "Assign" + input: "bert/encoder/layer_8/output/LayerNorm/beta" + input: "sub_148" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_641" + op: "Assign" + input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m" + input: "add_499" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_642" + op: "Assign" + input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v" + input: "add_500" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_800/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_800" + op: "Mul" + input: "Mul_800/x" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_801/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_801" + op: "Mul" + input: "Mul_801/x" + input: "clip_by_global_norm/clip_by_global_norm/_148" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_502" + op: "Add" + input: "Mul_800" + input: "Mul_801" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_802/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_802" + op: "Mul" + input: "Mul_802/x" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_148" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_148" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_803/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_803" + op: "Mul" + input: "Mul_803/x" + input: "Square_148" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_503" + op: "Add" + input: "Mul_802" + input: "Mul_803" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_148" + op: "Sqrt" + input: "add_503" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_504/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_504" + op: "Add" + input: "Sqrt_148" + input: "add_504/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_149" + op: "RealDiv" + input: "add_502" + input: "add_504" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_804" + op: "Mul" + input: "add" + input: "truediv_149" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_149" + op: "Sub" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/read" + input: "mul_804" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_643" + op: "Assign" + input: "bert/encoder/layer_8/output/LayerNorm/gamma" + input: "sub_149" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_644" + op: "Assign" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m" + input: "add_502" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_645" + op: "Assign" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v" + input: "add_503" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_805/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_805" + op: "Mul" + input: "Mul_805/x" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_806/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_806" + op: "Mul" + input: "Mul_806/x" + input: "clip_by_global_norm/clip_by_global_norm/_149" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_505" + op: "Add" + input: "Mul_805" + input: "Mul_806" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_807/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_807" + op: "Mul" + input: "Mul_807/x" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_149" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_149" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_808/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_808" + op: "Mul" + input: "Mul_808/x" + input: "Square_149" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_506" + op: "Add" + input: "Mul_807" + input: "Mul_808" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_149" + op: "Sqrt" + input: "add_506" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_507/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_507" + op: "Add" + input: "Sqrt_149" + input: "add_507/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_150" + op: "RealDiv" + input: "add_505" + input: "add_507" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_809/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_809" + op: "Mul" + input: "mul_809/x" + input: "bert/encoder/layer_9/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_508" + op: "Add" + input: "truediv_150" + input: "mul_809" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_810" + op: "Mul" + input: "add" + input: "add_508" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_150" + op: "Sub" + input: "bert/encoder/layer_9/attention/self/query/kernel/read" + input: "mul_810" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_646" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/query/kernel" + input: "sub_150" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_647" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_m" + input: "add_505" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_648" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_v" + input: "add_506" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_9/attention/self/query/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/self/query/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_9/attention/self/query/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/self/query/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_811/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_811" + op: "Mul" + input: "Mul_811/x" + input: "bert/encoder/layer_9/attention/self/query/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_812/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_812" + op: "Mul" + input: "Mul_812/x" + input: "clip_by_global_norm/clip_by_global_norm/_150" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_509" + op: "Add" + input: "Mul_811" + input: "Mul_812" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_813/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_813" + op: "Mul" + input: "Mul_813/x" + input: "bert/encoder/layer_9/attention/self/query/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_150" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_150" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_814/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_814" + op: "Mul" + input: "Mul_814/x" + input: "Square_150" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_510" + op: "Add" + input: "Mul_813" + input: "Mul_814" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_150" + op: "Sqrt" + input: "add_510" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_511/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_511" + op: "Add" + input: "Sqrt_150" + input: "add_511/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_151" + op: "RealDiv" + input: "add_509" + input: "add_511" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_815" + op: "Mul" + input: "add" + input: "truediv_151" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_151" + op: "Sub" + input: "bert/encoder/layer_9/attention/self/query/bias/read" + input: "mul_815" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_649" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/query/bias" + input: "sub_151" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_650" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/query/bias/adam_m" + input: "add_509" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_651" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/query/bias/adam_v" + input: "add_510" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_816/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_816" + op: "Mul" + input: "Mul_816/x" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_817/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_817" + op: "Mul" + input: "Mul_817/x" + input: "clip_by_global_norm/clip_by_global_norm/_151" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_512" + op: "Add" + input: "Mul_816" + input: "Mul_817" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_818/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_818" + op: "Mul" + input: "Mul_818/x" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_151" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_151" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_819/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_819" + op: "Mul" + input: "Mul_819/x" + input: "Square_151" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_513" + op: "Add" + input: "Mul_818" + input: "Mul_819" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_151" + op: "Sqrt" + input: "add_513" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_514/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_514" + op: "Add" + input: "Sqrt_151" + input: "add_514/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_152" + op: "RealDiv" + input: "add_512" + input: "add_514" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_820/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_820" + op: "Mul" + input: "mul_820/x" + input: "bert/encoder/layer_9/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_515" + op: "Add" + input: "truediv_152" + input: "mul_820" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_821" + op: "Mul" + input: "add" + input: "add_515" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_152" + op: "Sub" + input: "bert/encoder/layer_9/attention/self/key/kernel/read" + input: "mul_821" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_652" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/key/kernel" + input: "sub_152" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_653" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_m" + input: "add_512" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_654" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_v" + input: "add_513" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_9/attention/self/key/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/self/key/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_9/attention/self/key/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/self/key/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_822/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_822" + op: "Mul" + input: "Mul_822/x" + input: "bert/encoder/layer_9/attention/self/key/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_823/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_823" + op: "Mul" + input: "Mul_823/x" + input: "clip_by_global_norm/clip_by_global_norm/_152" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_516" + op: "Add" + input: "Mul_822" + input: "Mul_823" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_824/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_824" + op: "Mul" + input: "Mul_824/x" + input: "bert/encoder/layer_9/attention/self/key/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_152" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_152" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_825/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_825" + op: "Mul" + input: "Mul_825/x" + input: "Square_152" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_517" + op: "Add" + input: "Mul_824" + input: "Mul_825" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_152" + op: "Sqrt" + input: "add_517" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_518/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_518" + op: "Add" + input: "Sqrt_152" + input: "add_518/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_153" + op: "RealDiv" + input: "add_516" + input: "add_518" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_826" + op: "Mul" + input: "add" + input: "truediv_153" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_153" + op: "Sub" + input: "bert/encoder/layer_9/attention/self/key/bias/read" + input: "mul_826" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_655" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/key/bias" + input: "sub_153" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_656" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/key/bias/adam_m" + input: "add_516" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_657" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/key/bias/adam_v" + input: "add_517" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_827/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_827" + op: "Mul" + input: "Mul_827/x" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_828/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_828" + op: "Mul" + input: "Mul_828/x" + input: "clip_by_global_norm/clip_by_global_norm/_153" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_519" + op: "Add" + input: "Mul_827" + input: "Mul_828" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_829/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_829" + op: "Mul" + input: "Mul_829/x" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_153" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_153" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_830/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_830" + op: "Mul" + input: "Mul_830/x" + input: "Square_153" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_520" + op: "Add" + input: "Mul_829" + input: "Mul_830" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_153" + op: "Sqrt" + input: "add_520" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_521/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_521" + op: "Add" + input: "Sqrt_153" + input: "add_521/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_154" + op: "RealDiv" + input: "add_519" + input: "add_521" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_831/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_831" + op: "Mul" + input: "mul_831/x" + input: "bert/encoder/layer_9/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_522" + op: "Add" + input: "truediv_154" + input: "mul_831" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_832" + op: "Mul" + input: "add" + input: "add_522" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_154" + op: "Sub" + input: "bert/encoder/layer_9/attention/self/value/kernel/read" + input: "mul_832" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_658" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/value/kernel" + input: "sub_154" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_659" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_m" + input: "add_519" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_660" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_v" + input: "add_520" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_9/attention/self/value/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/self/value/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_9/attention/self/value/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/self/value/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_833/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_833" + op: "Mul" + input: "Mul_833/x" + input: "bert/encoder/layer_9/attention/self/value/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_834/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_834" + op: "Mul" + input: "Mul_834/x" + input: "clip_by_global_norm/clip_by_global_norm/_154" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_523" + op: "Add" + input: "Mul_833" + input: "Mul_834" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_835/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_835" + op: "Mul" + input: "Mul_835/x" + input: "bert/encoder/layer_9/attention/self/value/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_154" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_154" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_836/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_836" + op: "Mul" + input: "Mul_836/x" + input: "Square_154" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_524" + op: "Add" + input: "Mul_835" + input: "Mul_836" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_154" + op: "Sqrt" + input: "add_524" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_525/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_525" + op: "Add" + input: "Sqrt_154" + input: "add_525/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_155" + op: "RealDiv" + input: "add_523" + input: "add_525" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_837" + op: "Mul" + input: "add" + input: "truediv_155" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_155" + op: "Sub" + input: "bert/encoder/layer_9/attention/self/value/bias/read" + input: "mul_837" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_661" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/value/bias" + input: "sub_155" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_662" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/value/bias/adam_m" + input: "add_523" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_663" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/value/bias/adam_v" + input: "add_524" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_838/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_838" + op: "Mul" + input: "Mul_838/x" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_839/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_839" + op: "Mul" + input: "Mul_839/x" + input: "clip_by_global_norm/clip_by_global_norm/_155" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_526" + op: "Add" + input: "Mul_838" + input: "Mul_839" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_840/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_840" + op: "Mul" + input: "Mul_840/x" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_155" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_155" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_841/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_841" + op: "Mul" + input: "Mul_841/x" + input: "Square_155" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_527" + op: "Add" + input: "Mul_840" + input: "Mul_841" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_155" + op: "Sqrt" + input: "add_527" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_528/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_528" + op: "Add" + input: "Sqrt_155" + input: "add_528/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_156" + op: "RealDiv" + input: "add_526" + input: "add_528" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_842/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_842" + op: "Mul" + input: "mul_842/x" + input: "bert/encoder/layer_9/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_529" + op: "Add" + input: "truediv_156" + input: "mul_842" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_843" + op: "Mul" + input: "add" + input: "add_529" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_156" + op: "Sub" + input: "bert/encoder/layer_9/attention/output/dense/kernel/read" + input: "mul_843" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_664" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/dense/kernel" + input: "sub_156" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_665" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + input: "add_526" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_666" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + input: "add_527" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_9/attention/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_9/attention/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_844/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_844" + op: "Mul" + input: "Mul_844/x" + input: "bert/encoder/layer_9/attention/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_845/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_845" + op: "Mul" + input: "Mul_845/x" + input: "clip_by_global_norm/clip_by_global_norm/_156" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_530" + op: "Add" + input: "Mul_844" + input: "Mul_845" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_846/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_846" + op: "Mul" + input: "Mul_846/x" + input: "bert/encoder/layer_9/attention/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_156" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_156" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_847/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_847" + op: "Mul" + input: "Mul_847/x" + input: "Square_156" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_531" + op: "Add" + input: "Mul_846" + input: "Mul_847" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_156" + op: "Sqrt" + input: "add_531" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_532/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_532" + op: "Add" + input: "Sqrt_156" + input: "add_532/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_157" + op: "RealDiv" + input: "add_530" + input: "add_532" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_848" + op: "Mul" + input: "add" + input: "truediv_157" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_157" + op: "Sub" + input: "bert/encoder/layer_9/attention/output/dense/bias/read" + input: "mul_848" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_667" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/dense/bias" + input: "sub_157" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_668" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/dense/bias/adam_m" + input: "add_530" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_669" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/dense/bias/adam_v" + input: "add_531" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_849/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_849" + op: "Mul" + input: "Mul_849/x" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_850/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_850" + op: "Mul" + input: "Mul_850/x" + input: "clip_by_global_norm/clip_by_global_norm/_157" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_533" + op: "Add" + input: "Mul_849" + input: "Mul_850" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_851/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_851" + op: "Mul" + input: "Mul_851/x" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_157" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_157" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_852/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_852" + op: "Mul" + input: "Mul_852/x" + input: "Square_157" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_534" + op: "Add" + input: "Mul_851" + input: "Mul_852" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_157" + op: "Sqrt" + input: "add_534" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_535/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_535" + op: "Add" + input: "Sqrt_157" + input: "add_535/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_158" + op: "RealDiv" + input: "add_533" + input: "add_535" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_853" + op: "Mul" + input: "add" + input: "truediv_158" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_158" + op: "Sub" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/read" + input: "mul_853" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_670" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta" + input: "sub_158" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_671" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m" + input: "add_533" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_672" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v" + input: "add_534" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_854/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_854" + op: "Mul" + input: "Mul_854/x" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_855/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_855" + op: "Mul" + input: "Mul_855/x" + input: "clip_by_global_norm/clip_by_global_norm/_158" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_536" + op: "Add" + input: "Mul_854" + input: "Mul_855" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_856/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_856" + op: "Mul" + input: "Mul_856/x" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_158" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_158" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_857/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_857" + op: "Mul" + input: "Mul_857/x" + input: "Square_158" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_537" + op: "Add" + input: "Mul_856" + input: "Mul_857" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_158" + op: "Sqrt" + input: "add_537" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_538/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_538" + op: "Add" + input: "Sqrt_158" + input: "add_538/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_159" + op: "RealDiv" + input: "add_536" + input: "add_538" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_858" + op: "Mul" + input: "add" + input: "truediv_159" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_159" + op: "Sub" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/read" + input: "mul_858" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_673" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma" + input: "sub_159" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_674" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m" + input: "add_536" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_675" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v" + input: "add_537" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_859/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_859" + op: "Mul" + input: "Mul_859/x" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_860/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_860" + op: "Mul" + input: "Mul_860/x" + input: "clip_by_global_norm/clip_by_global_norm/_159" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_539" + op: "Add" + input: "Mul_859" + input: "Mul_860" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_861/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_861" + op: "Mul" + input: "Mul_861/x" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_159" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_159" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_862/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_862" + op: "Mul" + input: "Mul_862/x" + input: "Square_159" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_540" + op: "Add" + input: "Mul_861" + input: "Mul_862" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_159" + op: "Sqrt" + input: "add_540" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_541/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_541" + op: "Add" + input: "Sqrt_159" + input: "add_541/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_160" + op: "RealDiv" + input: "add_539" + input: "add_541" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_863/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_863" + op: "Mul" + input: "mul_863/x" + input: "bert/encoder/layer_9/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_542" + op: "Add" + input: "truediv_160" + input: "mul_863" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_864" + op: "Mul" + input: "add" + input: "add_542" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_160" + op: "Sub" + input: "bert/encoder/layer_9/intermediate/dense/kernel/read" + input: "mul_864" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_676" + op: "Assign" + input: "bert/encoder/layer_9/intermediate/dense/kernel" + input: "sub_160" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_677" + op: "Assign" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + input: "add_539" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_678" + op: "Assign" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + input: "add_540" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/bias/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/bias/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_865/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_865" + op: "Mul" + input: "Mul_865/x" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_866/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_866" + op: "Mul" + input: "Mul_866/x" + input: "clip_by_global_norm/clip_by_global_norm/_160" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_543" + op: "Add" + input: "Mul_865" + input: "Mul_866" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_867/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_867" + op: "Mul" + input: "Mul_867/x" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_160" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_160" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_868/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_868" + op: "Mul" + input: "Mul_868/x" + input: "Square_160" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_544" + op: "Add" + input: "Mul_867" + input: "Mul_868" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_160" + op: "Sqrt" + input: "add_544" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_545/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_545" + op: "Add" + input: "Sqrt_160" + input: "add_545/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_161" + op: "RealDiv" + input: "add_543" + input: "add_545" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_869" + op: "Mul" + input: "add" + input: "truediv_161" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_161" + op: "Sub" + input: "bert/encoder/layer_9/intermediate/dense/bias/read" + input: "mul_869" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_679" + op: "Assign" + input: "bert/encoder/layer_9/intermediate/dense/bias" + input: "sub_161" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_680" + op: "Assign" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_m" + input: "add_543" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_681" + op: "Assign" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_v" + input: "add_544" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_9/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_9/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_9/output/dense/kernel/adam_m" + input: "bert/encoder/layer_9/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_9/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_9/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_9/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_9/output/dense/kernel/adam_v" + input: "bert/encoder/layer_9/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_9/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_870/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_870" + op: "Mul" + input: "Mul_870/x" + input: "bert/encoder/layer_9/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_871/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_871" + op: "Mul" + input: "Mul_871/x" + input: "clip_by_global_norm/clip_by_global_norm/_161" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_546" + op: "Add" + input: "Mul_870" + input: "Mul_871" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_872/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_872" + op: "Mul" + input: "Mul_872/x" + input: "bert/encoder/layer_9/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_161" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_161" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_873/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_873" + op: "Mul" + input: "Mul_873/x" + input: "Square_161" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_547" + op: "Add" + input: "Mul_872" + input: "Mul_873" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_161" + op: "Sqrt" + input: "add_547" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_548/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_548" + op: "Add" + input: "Sqrt_161" + input: "add_548/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_162" + op: "RealDiv" + input: "add_546" + input: "add_548" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_874/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_874" + op: "Mul" + input: "mul_874/x" + input: "bert/encoder/layer_9/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_549" + op: "Add" + input: "truediv_162" + input: "mul_874" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_875" + op: "Mul" + input: "add" + input: "add_549" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_162" + op: "Sub" + input: "bert/encoder/layer_9/output/dense/kernel/read" + input: "mul_875" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_682" + op: "Assign" + input: "bert/encoder/layer_9/output/dense/kernel" + input: "sub_162" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_683" + op: "Assign" + input: "bert/encoder/layer_9/output/dense/kernel/adam_m" + input: "add_546" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_684" + op: "Assign" + input: "bert/encoder/layer_9/output/dense/kernel/adam_v" + input: "add_547" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_9/output/dense/bias/adam_m" + input: "bert/encoder/layer_9/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_9/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_9/output/dense/bias/adam_v" + input: "bert/encoder/layer_9/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_9/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_876/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_876" + op: "Mul" + input: "Mul_876/x" + input: "bert/encoder/layer_9/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_877/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_877" + op: "Mul" + input: "Mul_877/x" + input: "clip_by_global_norm/clip_by_global_norm/_162" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_550" + op: "Add" + input: "Mul_876" + input: "Mul_877" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_878/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_878" + op: "Mul" + input: "Mul_878/x" + input: "bert/encoder/layer_9/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_162" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_162" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_879/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_879" + op: "Mul" + input: "Mul_879/x" + input: "Square_162" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_551" + op: "Add" + input: "Mul_878" + input: "Mul_879" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_162" + op: "Sqrt" + input: "add_551" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_552/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_552" + op: "Add" + input: "Sqrt_162" + input: "add_552/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_163" + op: "RealDiv" + input: "add_550" + input: "add_552" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_880" + op: "Mul" + input: "add" + input: "truediv_163" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_163" + op: "Sub" + input: "bert/encoder/layer_9/output/dense/bias/read" + input: "mul_880" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_685" + op: "Assign" + input: "bert/encoder/layer_9/output/dense/bias" + input: "sub_163" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_686" + op: "Assign" + input: "bert/encoder/layer_9/output/dense/bias/adam_m" + input: "add_550" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_687" + op: "Assign" + input: "bert/encoder/layer_9/output/dense/bias/adam_v" + input: "add_551" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_881/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_881" + op: "Mul" + input: "Mul_881/x" + input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_882/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_882" + op: "Mul" + input: "Mul_882/x" + input: "clip_by_global_norm/clip_by_global_norm/_163" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_553" + op: "Add" + input: "Mul_881" + input: "Mul_882" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_883/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_883" + op: "Mul" + input: "Mul_883/x" + input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_163" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_163" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_884/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_884" + op: "Mul" + input: "Mul_884/x" + input: "Square_163" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_554" + op: "Add" + input: "Mul_883" + input: "Mul_884" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_163" + op: "Sqrt" + input: "add_554" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_555/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_555" + op: "Add" + input: "Sqrt_163" + input: "add_555/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_164" + op: "RealDiv" + input: "add_553" + input: "add_555" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_885" + op: "Mul" + input: "add" + input: "truediv_164" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_164" + op: "Sub" + input: "bert/encoder/layer_9/output/LayerNorm/beta/read" + input: "mul_885" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_688" + op: "Assign" + input: "bert/encoder/layer_9/output/LayerNorm/beta" + input: "sub_164" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_689" + op: "Assign" + input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m" + input: "add_553" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_690" + op: "Assign" + input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v" + input: "add_554" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_886/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_886" + op: "Mul" + input: "Mul_886/x" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_887/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_887" + op: "Mul" + input: "Mul_887/x" + input: "clip_by_global_norm/clip_by_global_norm/_164" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_556" + op: "Add" + input: "Mul_886" + input: "Mul_887" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_888/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_888" + op: "Mul" + input: "Mul_888/x" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_164" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_164" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_889/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_889" + op: "Mul" + input: "Mul_889/x" + input: "Square_164" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_557" + op: "Add" + input: "Mul_888" + input: "Mul_889" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_164" + op: "Sqrt" + input: "add_557" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_558/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_558" + op: "Add" + input: "Sqrt_164" + input: "add_558/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_165" + op: "RealDiv" + input: "add_556" + input: "add_558" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_890" + op: "Mul" + input: "add" + input: "truediv_165" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_165" + op: "Sub" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/read" + input: "mul_890" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_691" + op: "Assign" + input: "bert/encoder/layer_9/output/LayerNorm/gamma" + input: "sub_165" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_692" + op: "Assign" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m" + input: "add_556" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_693" + op: "Assign" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v" + input: "add_557" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_891/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_891" + op: "Mul" + input: "Mul_891/x" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_892/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_892" + op: "Mul" + input: "Mul_892/x" + input: "clip_by_global_norm/clip_by_global_norm/_165" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_559" + op: "Add" + input: "Mul_891" + input: "Mul_892" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_893/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_893" + op: "Mul" + input: "Mul_893/x" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_165" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_165" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_894/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_894" + op: "Mul" + input: "Mul_894/x" + input: "Square_165" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_560" + op: "Add" + input: "Mul_893" + input: "Mul_894" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_165" + op: "Sqrt" + input: "add_560" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_561/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_561" + op: "Add" + input: "Sqrt_165" + input: "add_561/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_166" + op: "RealDiv" + input: "add_559" + input: "add_561" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_895/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_895" + op: "Mul" + input: "mul_895/x" + input: "bert/encoder/layer_10/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_562" + op: "Add" + input: "truediv_166" + input: "mul_895" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_896" + op: "Mul" + input: "add" + input: "add_562" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_166" + op: "Sub" + input: "bert/encoder/layer_10/attention/self/query/kernel/read" + input: "mul_896" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_694" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/query/kernel" + input: "sub_166" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_695" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_m" + input: "add_559" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_696" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_v" + input: "add_560" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_10/attention/self/query/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/self/query/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_10/attention/self/query/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/self/query/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_897/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_897" + op: "Mul" + input: "Mul_897/x" + input: "bert/encoder/layer_10/attention/self/query/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_898/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_898" + op: "Mul" + input: "Mul_898/x" + input: "clip_by_global_norm/clip_by_global_norm/_166" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_563" + op: "Add" + input: "Mul_897" + input: "Mul_898" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_899/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_899" + op: "Mul" + input: "Mul_899/x" + input: "bert/encoder/layer_10/attention/self/query/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_166" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_166" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_900/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_900" + op: "Mul" + input: "Mul_900/x" + input: "Square_166" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_564" + op: "Add" + input: "Mul_899" + input: "Mul_900" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_166" + op: "Sqrt" + input: "add_564" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_565/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_565" + op: "Add" + input: "Sqrt_166" + input: "add_565/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_167" + op: "RealDiv" + input: "add_563" + input: "add_565" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_901" + op: "Mul" + input: "add" + input: "truediv_167" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_167" + op: "Sub" + input: "bert/encoder/layer_10/attention/self/query/bias/read" + input: "mul_901" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_697" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/query/bias" + input: "sub_167" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_698" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/query/bias/adam_m" + input: "add_563" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_699" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/query/bias/adam_v" + input: "add_564" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_902/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_902" + op: "Mul" + input: "Mul_902/x" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_903/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_903" + op: "Mul" + input: "Mul_903/x" + input: "clip_by_global_norm/clip_by_global_norm/_167" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_566" + op: "Add" + input: "Mul_902" + input: "Mul_903" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_904/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_904" + op: "Mul" + input: "Mul_904/x" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_167" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_167" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_905/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_905" + op: "Mul" + input: "Mul_905/x" + input: "Square_167" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_567" + op: "Add" + input: "Mul_904" + input: "Mul_905" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_167" + op: "Sqrt" + input: "add_567" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_568/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_568" + op: "Add" + input: "Sqrt_167" + input: "add_568/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_168" + op: "RealDiv" + input: "add_566" + input: "add_568" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_906/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_906" + op: "Mul" + input: "mul_906/x" + input: "bert/encoder/layer_10/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_569" + op: "Add" + input: "truediv_168" + input: "mul_906" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_907" + op: "Mul" + input: "add" + input: "add_569" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_168" + op: "Sub" + input: "bert/encoder/layer_10/attention/self/key/kernel/read" + input: "mul_907" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_700" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/key/kernel" + input: "sub_168" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_701" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_m" + input: "add_566" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_702" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_v" + input: "add_567" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_10/attention/self/key/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/self/key/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_10/attention/self/key/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/self/key/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_908/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_908" + op: "Mul" + input: "Mul_908/x" + input: "bert/encoder/layer_10/attention/self/key/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_909/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_909" + op: "Mul" + input: "Mul_909/x" + input: "clip_by_global_norm/clip_by_global_norm/_168" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_570" + op: "Add" + input: "Mul_908" + input: "Mul_909" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_910/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_910" + op: "Mul" + input: "Mul_910/x" + input: "bert/encoder/layer_10/attention/self/key/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_168" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_168" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_911/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_911" + op: "Mul" + input: "Mul_911/x" + input: "Square_168" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_571" + op: "Add" + input: "Mul_910" + input: "Mul_911" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_168" + op: "Sqrt" + input: "add_571" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_572/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_572" + op: "Add" + input: "Sqrt_168" + input: "add_572/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_169" + op: "RealDiv" + input: "add_570" + input: "add_572" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_912" + op: "Mul" + input: "add" + input: "truediv_169" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_169" + op: "Sub" + input: "bert/encoder/layer_10/attention/self/key/bias/read" + input: "mul_912" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_703" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/key/bias" + input: "sub_169" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_704" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/key/bias/adam_m" + input: "add_570" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_705" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/key/bias/adam_v" + input: "add_571" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_913/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_913" + op: "Mul" + input: "Mul_913/x" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_914/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_914" + op: "Mul" + input: "Mul_914/x" + input: "clip_by_global_norm/clip_by_global_norm/_169" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_573" + op: "Add" + input: "Mul_913" + input: "Mul_914" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_915/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_915" + op: "Mul" + input: "Mul_915/x" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_169" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_169" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_916/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_916" + op: "Mul" + input: "Mul_916/x" + input: "Square_169" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_574" + op: "Add" + input: "Mul_915" + input: "Mul_916" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_169" + op: "Sqrt" + input: "add_574" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_575/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_575" + op: "Add" + input: "Sqrt_169" + input: "add_575/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_170" + op: "RealDiv" + input: "add_573" + input: "add_575" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_917/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_917" + op: "Mul" + input: "mul_917/x" + input: "bert/encoder/layer_10/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_576" + op: "Add" + input: "truediv_170" + input: "mul_917" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_918" + op: "Mul" + input: "add" + input: "add_576" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_170" + op: "Sub" + input: "bert/encoder/layer_10/attention/self/value/kernel/read" + input: "mul_918" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_706" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/value/kernel" + input: "sub_170" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_707" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_m" + input: "add_573" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_708" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_v" + input: "add_574" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_10/attention/self/value/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/self/value/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_10/attention/self/value/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/self/value/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_919/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_919" + op: "Mul" + input: "Mul_919/x" + input: "bert/encoder/layer_10/attention/self/value/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_920/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_920" + op: "Mul" + input: "Mul_920/x" + input: "clip_by_global_norm/clip_by_global_norm/_170" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_577" + op: "Add" + input: "Mul_919" + input: "Mul_920" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_921/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_921" + op: "Mul" + input: "Mul_921/x" + input: "bert/encoder/layer_10/attention/self/value/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_170" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_170" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_922/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_922" + op: "Mul" + input: "Mul_922/x" + input: "Square_170" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_578" + op: "Add" + input: "Mul_921" + input: "Mul_922" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_170" + op: "Sqrt" + input: "add_578" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_579/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_579" + op: "Add" + input: "Sqrt_170" + input: "add_579/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_171" + op: "RealDiv" + input: "add_577" + input: "add_579" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_923" + op: "Mul" + input: "add" + input: "truediv_171" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_171" + op: "Sub" + input: "bert/encoder/layer_10/attention/self/value/bias/read" + input: "mul_923" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_709" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/value/bias" + input: "sub_171" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_710" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/value/bias/adam_m" + input: "add_577" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_711" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/value/bias/adam_v" + input: "add_578" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_924/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_924" + op: "Mul" + input: "Mul_924/x" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_925/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_925" + op: "Mul" + input: "Mul_925/x" + input: "clip_by_global_norm/clip_by_global_norm/_171" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_580" + op: "Add" + input: "Mul_924" + input: "Mul_925" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_926/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_926" + op: "Mul" + input: "Mul_926/x" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_171" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_171" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_927/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_927" + op: "Mul" + input: "Mul_927/x" + input: "Square_171" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_581" + op: "Add" + input: "Mul_926" + input: "Mul_927" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_171" + op: "Sqrt" + input: "add_581" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_582/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_582" + op: "Add" + input: "Sqrt_171" + input: "add_582/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_172" + op: "RealDiv" + input: "add_580" + input: "add_582" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_928/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_928" + op: "Mul" + input: "mul_928/x" + input: "bert/encoder/layer_10/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_583" + op: "Add" + input: "truediv_172" + input: "mul_928" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_929" + op: "Mul" + input: "add" + input: "add_583" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_172" + op: "Sub" + input: "bert/encoder/layer_10/attention/output/dense/kernel/read" + input: "mul_929" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_712" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/dense/kernel" + input: "sub_172" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_713" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + input: "add_580" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_714" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + input: "add_581" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_10/attention/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_10/attention/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_930/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_930" + op: "Mul" + input: "Mul_930/x" + input: "bert/encoder/layer_10/attention/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_931/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_931" + op: "Mul" + input: "Mul_931/x" + input: "clip_by_global_norm/clip_by_global_norm/_172" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_584" + op: "Add" + input: "Mul_930" + input: "Mul_931" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_932/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_932" + op: "Mul" + input: "Mul_932/x" + input: "bert/encoder/layer_10/attention/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_172" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_172" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_933/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_933" + op: "Mul" + input: "Mul_933/x" + input: "Square_172" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_585" + op: "Add" + input: "Mul_932" + input: "Mul_933" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_172" + op: "Sqrt" + input: "add_585" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_586/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_586" + op: "Add" + input: "Sqrt_172" + input: "add_586/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_173" + op: "RealDiv" + input: "add_584" + input: "add_586" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_934" + op: "Mul" + input: "add" + input: "truediv_173" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_173" + op: "Sub" + input: "bert/encoder/layer_10/attention/output/dense/bias/read" + input: "mul_934" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_715" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/dense/bias" + input: "sub_173" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_716" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/dense/bias/adam_m" + input: "add_584" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_717" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/dense/bias/adam_v" + input: "add_585" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_935/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_935" + op: "Mul" + input: "Mul_935/x" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_936/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_936" + op: "Mul" + input: "Mul_936/x" + input: "clip_by_global_norm/clip_by_global_norm/_173" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_587" + op: "Add" + input: "Mul_935" + input: "Mul_936" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_937/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_937" + op: "Mul" + input: "Mul_937/x" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_173" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_173" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_938/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_938" + op: "Mul" + input: "Mul_938/x" + input: "Square_173" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_588" + op: "Add" + input: "Mul_937" + input: "Mul_938" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_173" + op: "Sqrt" + input: "add_588" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_589/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_589" + op: "Add" + input: "Sqrt_173" + input: "add_589/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_174" + op: "RealDiv" + input: "add_587" + input: "add_589" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_939" + op: "Mul" + input: "add" + input: "truediv_174" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_174" + op: "Sub" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/read" + input: "mul_939" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_718" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta" + input: "sub_174" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_719" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m" + input: "add_587" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_720" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v" + input: "add_588" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_940/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_940" + op: "Mul" + input: "Mul_940/x" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_941/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_941" + op: "Mul" + input: "Mul_941/x" + input: "clip_by_global_norm/clip_by_global_norm/_174" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_590" + op: "Add" + input: "Mul_940" + input: "Mul_941" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_942/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_942" + op: "Mul" + input: "Mul_942/x" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_174" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_174" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_943/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_943" + op: "Mul" + input: "Mul_943/x" + input: "Square_174" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_591" + op: "Add" + input: "Mul_942" + input: "Mul_943" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_174" + op: "Sqrt" + input: "add_591" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_592/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_592" + op: "Add" + input: "Sqrt_174" + input: "add_592/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_175" + op: "RealDiv" + input: "add_590" + input: "add_592" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_944" + op: "Mul" + input: "add" + input: "truediv_175" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_175" + op: "Sub" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/read" + input: "mul_944" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_721" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma" + input: "sub_175" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_722" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m" + input: "add_590" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_723" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v" + input: "add_591" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_945/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_945" + op: "Mul" + input: "Mul_945/x" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_946/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_946" + op: "Mul" + input: "Mul_946/x" + input: "clip_by_global_norm/clip_by_global_norm/_175" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_593" + op: "Add" + input: "Mul_945" + input: "Mul_946" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_947/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_947" + op: "Mul" + input: "Mul_947/x" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_175" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_175" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_948/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_948" + op: "Mul" + input: "Mul_948/x" + input: "Square_175" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_594" + op: "Add" + input: "Mul_947" + input: "Mul_948" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_175" + op: "Sqrt" + input: "add_594" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_595/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_595" + op: "Add" + input: "Sqrt_175" + input: "add_595/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_176" + op: "RealDiv" + input: "add_593" + input: "add_595" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_949/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_949" + op: "Mul" + input: "mul_949/x" + input: "bert/encoder/layer_10/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_596" + op: "Add" + input: "truediv_176" + input: "mul_949" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_950" + op: "Mul" + input: "add" + input: "add_596" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_176" + op: "Sub" + input: "bert/encoder/layer_10/intermediate/dense/kernel/read" + input: "mul_950" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_724" + op: "Assign" + input: "bert/encoder/layer_10/intermediate/dense/kernel" + input: "sub_176" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_725" + op: "Assign" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + input: "add_593" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_726" + op: "Assign" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + input: "add_594" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/bias/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/bias/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_951/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_951" + op: "Mul" + input: "Mul_951/x" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_952/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_952" + op: "Mul" + input: "Mul_952/x" + input: "clip_by_global_norm/clip_by_global_norm/_176" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_597" + op: "Add" + input: "Mul_951" + input: "Mul_952" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_953/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_953" + op: "Mul" + input: "Mul_953/x" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_176" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_176" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_954/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_954" + op: "Mul" + input: "Mul_954/x" + input: "Square_176" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_598" + op: "Add" + input: "Mul_953" + input: "Mul_954" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_176" + op: "Sqrt" + input: "add_598" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_599/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_599" + op: "Add" + input: "Sqrt_176" + input: "add_599/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_177" + op: "RealDiv" + input: "add_597" + input: "add_599" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_955" + op: "Mul" + input: "add" + input: "truediv_177" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_177" + op: "Sub" + input: "bert/encoder/layer_10/intermediate/dense/bias/read" + input: "mul_955" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_727" + op: "Assign" + input: "bert/encoder/layer_10/intermediate/dense/bias" + input: "sub_177" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_728" + op: "Assign" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_m" + input: "add_597" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_729" + op: "Assign" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_v" + input: "add_598" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_10/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_10/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_10/output/dense/kernel/adam_m" + input: "bert/encoder/layer_10/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_10/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_10/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_10/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_10/output/dense/kernel/adam_v" + input: "bert/encoder/layer_10/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_10/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_956/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_956" + op: "Mul" + input: "Mul_956/x" + input: "bert/encoder/layer_10/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_957/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_957" + op: "Mul" + input: "Mul_957/x" + input: "clip_by_global_norm/clip_by_global_norm/_177" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_600" + op: "Add" + input: "Mul_956" + input: "Mul_957" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_958/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_958" + op: "Mul" + input: "Mul_958/x" + input: "bert/encoder/layer_10/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_177" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_177" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_959/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_959" + op: "Mul" + input: "Mul_959/x" + input: "Square_177" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_601" + op: "Add" + input: "Mul_958" + input: "Mul_959" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_177" + op: "Sqrt" + input: "add_601" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_602/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_602" + op: "Add" + input: "Sqrt_177" + input: "add_602/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_178" + op: "RealDiv" + input: "add_600" + input: "add_602" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_960/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_960" + op: "Mul" + input: "mul_960/x" + input: "bert/encoder/layer_10/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_603" + op: "Add" + input: "truediv_178" + input: "mul_960" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_961" + op: "Mul" + input: "add" + input: "add_603" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_178" + op: "Sub" + input: "bert/encoder/layer_10/output/dense/kernel/read" + input: "mul_961" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_730" + op: "Assign" + input: "bert/encoder/layer_10/output/dense/kernel" + input: "sub_178" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_731" + op: "Assign" + input: "bert/encoder/layer_10/output/dense/kernel/adam_m" + input: "add_600" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_732" + op: "Assign" + input: "bert/encoder/layer_10/output/dense/kernel/adam_v" + input: "add_601" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_10/output/dense/bias/adam_m" + input: "bert/encoder/layer_10/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_10/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_10/output/dense/bias/adam_v" + input: "bert/encoder/layer_10/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_10/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_962/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_962" + op: "Mul" + input: "Mul_962/x" + input: "bert/encoder/layer_10/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_963/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_963" + op: "Mul" + input: "Mul_963/x" + input: "clip_by_global_norm/clip_by_global_norm/_178" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_604" + op: "Add" + input: "Mul_962" + input: "Mul_963" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_964/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_964" + op: "Mul" + input: "Mul_964/x" + input: "bert/encoder/layer_10/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_178" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_178" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_965/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_965" + op: "Mul" + input: "Mul_965/x" + input: "Square_178" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_605" + op: "Add" + input: "Mul_964" + input: "Mul_965" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_178" + op: "Sqrt" + input: "add_605" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_606/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_606" + op: "Add" + input: "Sqrt_178" + input: "add_606/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_179" + op: "RealDiv" + input: "add_604" + input: "add_606" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_966" + op: "Mul" + input: "add" + input: "truediv_179" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_179" + op: "Sub" + input: "bert/encoder/layer_10/output/dense/bias/read" + input: "mul_966" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_733" + op: "Assign" + input: "bert/encoder/layer_10/output/dense/bias" + input: "sub_179" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_734" + op: "Assign" + input: "bert/encoder/layer_10/output/dense/bias/adam_m" + input: "add_604" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_735" + op: "Assign" + input: "bert/encoder/layer_10/output/dense/bias/adam_v" + input: "add_605" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_967/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_967" + op: "Mul" + input: "Mul_967/x" + input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_968/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_968" + op: "Mul" + input: "Mul_968/x" + input: "clip_by_global_norm/clip_by_global_norm/_179" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_607" + op: "Add" + input: "Mul_967" + input: "Mul_968" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_969/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_969" + op: "Mul" + input: "Mul_969/x" + input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_179" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_179" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_970/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_970" + op: "Mul" + input: "Mul_970/x" + input: "Square_179" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_608" + op: "Add" + input: "Mul_969" + input: "Mul_970" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_179" + op: "Sqrt" + input: "add_608" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_609/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_609" + op: "Add" + input: "Sqrt_179" + input: "add_609/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_180" + op: "RealDiv" + input: "add_607" + input: "add_609" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_971" + op: "Mul" + input: "add" + input: "truediv_180" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_180" + op: "Sub" + input: "bert/encoder/layer_10/output/LayerNorm/beta/read" + input: "mul_971" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_736" + op: "Assign" + input: "bert/encoder/layer_10/output/LayerNorm/beta" + input: "sub_180" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_737" + op: "Assign" + input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m" + input: "add_607" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_738" + op: "Assign" + input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v" + input: "add_608" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_972/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_972" + op: "Mul" + input: "Mul_972/x" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_973/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_973" + op: "Mul" + input: "Mul_973/x" + input: "clip_by_global_norm/clip_by_global_norm/_180" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_610" + op: "Add" + input: "Mul_972" + input: "Mul_973" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_974/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_974" + op: "Mul" + input: "Mul_974/x" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_180" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_180" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_975/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_975" + op: "Mul" + input: "Mul_975/x" + input: "Square_180" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_611" + op: "Add" + input: "Mul_974" + input: "Mul_975" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_180" + op: "Sqrt" + input: "add_611" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_612/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_612" + op: "Add" + input: "Sqrt_180" + input: "add_612/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_181" + op: "RealDiv" + input: "add_610" + input: "add_612" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_976" + op: "Mul" + input: "add" + input: "truediv_181" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_181" + op: "Sub" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/read" + input: "mul_976" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_739" + op: "Assign" + input: "bert/encoder/layer_10/output/LayerNorm/gamma" + input: "sub_181" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_740" + op: "Assign" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m" + input: "add_610" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_741" + op: "Assign" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v" + input: "add_611" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_977/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_977" + op: "Mul" + input: "Mul_977/x" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_978/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_978" + op: "Mul" + input: "Mul_978/x" + input: "clip_by_global_norm/clip_by_global_norm/_181" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_613" + op: "Add" + input: "Mul_977" + input: "Mul_978" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_979/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_979" + op: "Mul" + input: "Mul_979/x" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_181" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_181" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_980/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_980" + op: "Mul" + input: "Mul_980/x" + input: "Square_181" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_614" + op: "Add" + input: "Mul_979" + input: "Mul_980" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_181" + op: "Sqrt" + input: "add_614" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_615/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_615" + op: "Add" + input: "Sqrt_181" + input: "add_615/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_182" + op: "RealDiv" + input: "add_613" + input: "add_615" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_981/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_981" + op: "Mul" + input: "mul_981/x" + input: "bert/encoder/layer_11/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_616" + op: "Add" + input: "truediv_182" + input: "mul_981" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_982" + op: "Mul" + input: "add" + input: "add_616" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_182" + op: "Sub" + input: "bert/encoder/layer_11/attention/self/query/kernel/read" + input: "mul_982" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_742" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/query/kernel" + input: "sub_182" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_743" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_m" + input: "add_613" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_744" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_v" + input: "add_614" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_11/attention/self/query/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/self/query/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_11/attention/self/query/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/self/query/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_983/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_983" + op: "Mul" + input: "Mul_983/x" + input: "bert/encoder/layer_11/attention/self/query/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_984/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_984" + op: "Mul" + input: "Mul_984/x" + input: "clip_by_global_norm/clip_by_global_norm/_182" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_617" + op: "Add" + input: "Mul_983" + input: "Mul_984" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_985/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_985" + op: "Mul" + input: "Mul_985/x" + input: "bert/encoder/layer_11/attention/self/query/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_182" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_182" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_986/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_986" + op: "Mul" + input: "Mul_986/x" + input: "Square_182" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_618" + op: "Add" + input: "Mul_985" + input: "Mul_986" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_182" + op: "Sqrt" + input: "add_618" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_619/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_619" + op: "Add" + input: "Sqrt_182" + input: "add_619/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_183" + op: "RealDiv" + input: "add_617" + input: "add_619" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_987" + op: "Mul" + input: "add" + input: "truediv_183" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_183" + op: "Sub" + input: "bert/encoder/layer_11/attention/self/query/bias/read" + input: "mul_987" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_745" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/query/bias" + input: "sub_183" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_746" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/query/bias/adam_m" + input: "add_617" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_747" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/query/bias/adam_v" + input: "add_618" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_988/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_988" + op: "Mul" + input: "Mul_988/x" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_989/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_989" + op: "Mul" + input: "Mul_989/x" + input: "clip_by_global_norm/clip_by_global_norm/_183" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_620" + op: "Add" + input: "Mul_988" + input: "Mul_989" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_990/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_990" + op: "Mul" + input: "Mul_990/x" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_183" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_183" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_991/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_991" + op: "Mul" + input: "Mul_991/x" + input: "Square_183" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_621" + op: "Add" + input: "Mul_990" + input: "Mul_991" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_183" + op: "Sqrt" + input: "add_621" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_622/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_622" + op: "Add" + input: "Sqrt_183" + input: "add_622/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_184" + op: "RealDiv" + input: "add_620" + input: "add_622" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_992/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_992" + op: "Mul" + input: "mul_992/x" + input: "bert/encoder/layer_11/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_623" + op: "Add" + input: "truediv_184" + input: "mul_992" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_993" + op: "Mul" + input: "add" + input: "add_623" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_184" + op: "Sub" + input: "bert/encoder/layer_11/attention/self/key/kernel/read" + input: "mul_993" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_748" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/key/kernel" + input: "sub_184" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_749" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_m" + input: "add_620" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_750" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_v" + input: "add_621" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_11/attention/self/key/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/self/key/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_11/attention/self/key/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/self/key/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_994/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_994" + op: "Mul" + input: "Mul_994/x" + input: "bert/encoder/layer_11/attention/self/key/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_995/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_995" + op: "Mul" + input: "Mul_995/x" + input: "clip_by_global_norm/clip_by_global_norm/_184" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_624" + op: "Add" + input: "Mul_994" + input: "Mul_995" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_996/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_996" + op: "Mul" + input: "Mul_996/x" + input: "bert/encoder/layer_11/attention/self/key/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_184" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_184" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_997/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_997" + op: "Mul" + input: "Mul_997/x" + input: "Square_184" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_625" + op: "Add" + input: "Mul_996" + input: "Mul_997" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_184" + op: "Sqrt" + input: "add_625" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_626/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_626" + op: "Add" + input: "Sqrt_184" + input: "add_626/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_185" + op: "RealDiv" + input: "add_624" + input: "add_626" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_998" + op: "Mul" + input: "add" + input: "truediv_185" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_185" + op: "Sub" + input: "bert/encoder/layer_11/attention/self/key/bias/read" + input: "mul_998" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_751" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/key/bias" + input: "sub_185" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_752" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/key/bias/adam_m" + input: "add_624" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_753" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/key/bias/adam_v" + input: "add_625" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_999/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_999" + op: "Mul" + input: "Mul_999/x" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1000/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_1000" + op: "Mul" + input: "Mul_1000/x" + input: "clip_by_global_norm/clip_by_global_norm/_185" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_627" + op: "Add" + input: "Mul_999" + input: "Mul_1000" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1001/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_1001" + op: "Mul" + input: "Mul_1001/x" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_185" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_185" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1002/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_1002" + op: "Mul" + input: "Mul_1002/x" + input: "Square_185" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_628" + op: "Add" + input: "Mul_1001" + input: "Mul_1002" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_185" + op: "Sqrt" + input: "add_628" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_629/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_629" + op: "Add" + input: "Sqrt_185" + input: "add_629/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_186" + op: "RealDiv" + input: "add_627" + input: "add_629" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_1003/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_1003" + op: "Mul" + input: "mul_1003/x" + input: "bert/encoder/layer_11/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_630" + op: "Add" + input: "truediv_186" + input: "mul_1003" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_1004" + op: "Mul" + input: "add" + input: "add_630" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_186" + op: "Sub" + input: "bert/encoder/layer_11/attention/self/value/kernel/read" + input: "mul_1004" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_754" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/value/kernel" + input: "sub_186" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_755" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_m" + input: "add_627" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_756" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_v" + input: "add_628" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_11/attention/self/value/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/self/value/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_11/attention/self/value/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/self/value/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1005/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_1005" + op: "Mul" + input: "Mul_1005/x" + input: "bert/encoder/layer_11/attention/self/value/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1006/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_1006" + op: "Mul" + input: "Mul_1006/x" + input: "clip_by_global_norm/clip_by_global_norm/_186" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_631" + op: "Add" + input: "Mul_1005" + input: "Mul_1006" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1007/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_1007" + op: "Mul" + input: "Mul_1007/x" + input: "bert/encoder/layer_11/attention/self/value/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_186" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_186" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1008/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_1008" + op: "Mul" + input: "Mul_1008/x" + input: "Square_186" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_632" + op: "Add" + input: "Mul_1007" + input: "Mul_1008" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_186" + op: "Sqrt" + input: "add_632" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_633/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_633" + op: "Add" + input: "Sqrt_186" + input: "add_633/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_187" + op: "RealDiv" + input: "add_631" + input: "add_633" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_1009" + op: "Mul" + input: "add" + input: "truediv_187" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_187" + op: "Sub" + input: "bert/encoder/layer_11/attention/self/value/bias/read" + input: "mul_1009" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_757" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/value/bias" + input: "sub_187" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_758" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/value/bias/adam_m" + input: "add_631" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_759" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/value/bias/adam_v" + input: "add_632" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1010/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_1010" + op: "Mul" + input: "Mul_1010/x" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1011/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_1011" + op: "Mul" + input: "Mul_1011/x" + input: "clip_by_global_norm/clip_by_global_norm/_187" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_634" + op: "Add" + input: "Mul_1010" + input: "Mul_1011" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1012/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_1012" + op: "Mul" + input: "Mul_1012/x" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_187" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_187" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1013/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_1013" + op: "Mul" + input: "Mul_1013/x" + input: "Square_187" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_635" + op: "Add" + input: "Mul_1012" + input: "Mul_1013" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_187" + op: "Sqrt" + input: "add_635" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_636/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_636" + op: "Add" + input: "Sqrt_187" + input: "add_636/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_188" + op: "RealDiv" + input: "add_634" + input: "add_636" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_1014/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_1014" + op: "Mul" + input: "mul_1014/x" + input: "bert/encoder/layer_11/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_637" + op: "Add" + input: "truediv_188" + input: "mul_1014" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_1015" + op: "Mul" + input: "add" + input: "add_637" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_188" + op: "Sub" + input: "bert/encoder/layer_11/attention/output/dense/kernel/read" + input: "mul_1015" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_760" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/dense/kernel" + input: "sub_188" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_761" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + input: "add_634" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_762" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + input: "add_635" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_11/attention/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_11/attention/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1016/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_1016" + op: "Mul" + input: "Mul_1016/x" + input: "bert/encoder/layer_11/attention/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1017/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_1017" + op: "Mul" + input: "Mul_1017/x" + input: "clip_by_global_norm/clip_by_global_norm/_188" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_638" + op: "Add" + input: "Mul_1016" + input: "Mul_1017" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1018/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_1018" + op: "Mul" + input: "Mul_1018/x" + input: "bert/encoder/layer_11/attention/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_188" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_188" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1019/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_1019" + op: "Mul" + input: "Mul_1019/x" + input: "Square_188" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_639" + op: "Add" + input: "Mul_1018" + input: "Mul_1019" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_188" + op: "Sqrt" + input: "add_639" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_640/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_640" + op: "Add" + input: "Sqrt_188" + input: "add_640/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_189" + op: "RealDiv" + input: "add_638" + input: "add_640" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_1020" + op: "Mul" + input: "add" + input: "truediv_189" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_189" + op: "Sub" + input: "bert/encoder/layer_11/attention/output/dense/bias/read" + input: "mul_1020" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_763" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/dense/bias" + input: "sub_189" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_764" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/dense/bias/adam_m" + input: "add_638" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_765" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/dense/bias/adam_v" + input: "add_639" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1021/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_1021" + op: "Mul" + input: "Mul_1021/x" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1022/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_1022" + op: "Mul" + input: "Mul_1022/x" + input: "clip_by_global_norm/clip_by_global_norm/_189" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_641" + op: "Add" + input: "Mul_1021" + input: "Mul_1022" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1023/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_1023" + op: "Mul" + input: "Mul_1023/x" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_189" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_189" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1024/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_1024" + op: "Mul" + input: "Mul_1024/x" + input: "Square_189" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_642" + op: "Add" + input: "Mul_1023" + input: "Mul_1024" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_189" + op: "Sqrt" + input: "add_642" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_643/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_643" + op: "Add" + input: "Sqrt_189" + input: "add_643/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_190" + op: "RealDiv" + input: "add_641" + input: "add_643" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_1025" + op: "Mul" + input: "add" + input: "truediv_190" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_190" + op: "Sub" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/read" + input: "mul_1025" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_766" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta" + input: "sub_190" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_767" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m" + input: "add_641" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_768" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v" + input: "add_642" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1026/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_1026" + op: "Mul" + input: "Mul_1026/x" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1027/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_1027" + op: "Mul" + input: "Mul_1027/x" + input: "clip_by_global_norm/clip_by_global_norm/_190" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_644" + op: "Add" + input: "Mul_1026" + input: "Mul_1027" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1028/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_1028" + op: "Mul" + input: "Mul_1028/x" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_190" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_190" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1029/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_1029" + op: "Mul" + input: "Mul_1029/x" + input: "Square_190" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_645" + op: "Add" + input: "Mul_1028" + input: "Mul_1029" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_190" + op: "Sqrt" + input: "add_645" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_646/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_646" + op: "Add" + input: "Sqrt_190" + input: "add_646/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_191" + op: "RealDiv" + input: "add_644" + input: "add_646" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_1030" + op: "Mul" + input: "add" + input: "truediv_191" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_191" + op: "Sub" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/read" + input: "mul_1030" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_769" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma" + input: "sub_191" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_770" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m" + input: "add_644" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_771" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v" + input: "add_645" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_1031/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_1031" + op: "Mul" + input: "Mul_1031/x" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_1032/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_1032" + op: "Mul" + input: "Mul_1032/x" + input: "clip_by_global_norm/clip_by_global_norm/_191" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_647" + op: "Add" + input: "Mul_1031" + input: "Mul_1032" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_1033/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_1033" + op: "Mul" + input: "Mul_1033/x" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_191" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_191" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_1034/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_1034" + op: "Mul" + input: "Mul_1034/x" + input: "Square_191" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_648" + op: "Add" + input: "Mul_1033" + input: "Mul_1034" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_191" + op: "Sqrt" + input: "add_648" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_649/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_649" + op: "Add" + input: "Sqrt_191" + input: "add_649/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_192" + op: "RealDiv" + input: "add_647" + input: "add_649" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_1035/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_1035" + op: "Mul" + input: "mul_1035/x" + input: "bert/encoder/layer_11/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_650" + op: "Add" + input: "truediv_192" + input: "mul_1035" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_1036" + op: "Mul" + input: "add" + input: "add_650" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_192" + op: "Sub" + input: "bert/encoder/layer_11/intermediate/dense/kernel/read" + input: "mul_1036" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_772" + op: "Assign" + input: "bert/encoder/layer_11/intermediate/dense/kernel" + input: "sub_192" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_773" + op: "Assign" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + input: "add_647" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_774" + op: "Assign" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + input: "add_648" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/bias/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/bias/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_1037/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_1037" + op: "Mul" + input: "Mul_1037/x" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_1038/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_1038" + op: "Mul" + input: "Mul_1038/x" + input: "clip_by_global_norm/clip_by_global_norm/_192" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_651" + op: "Add" + input: "Mul_1037" + input: "Mul_1038" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_1039/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_1039" + op: "Mul" + input: "Mul_1039/x" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_192" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_192" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_1040/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_1040" + op: "Mul" + input: "Mul_1040/x" + input: "Square_192" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_652" + op: "Add" + input: "Mul_1039" + input: "Mul_1040" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_192" + op: "Sqrt" + input: "add_652" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_653/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_653" + op: "Add" + input: "Sqrt_192" + input: "add_653/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_193" + op: "RealDiv" + input: "add_651" + input: "add_653" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_1041" + op: "Mul" + input: "add" + input: "truediv_193" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_193" + op: "Sub" + input: "bert/encoder/layer_11/intermediate/dense/bias/read" + input: "mul_1041" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_775" + op: "Assign" + input: "bert/encoder/layer_11/intermediate/dense/bias" + input: "sub_193" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_776" + op: "Assign" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_m" + input: "add_651" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_777" + op: "Assign" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_v" + input: "add_652" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_11/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_11/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_11/output/dense/kernel/adam_m" + input: "bert/encoder/layer_11/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_11/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_11/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_11/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_11/output/dense/kernel/adam_v" + input: "bert/encoder/layer_11/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_11/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1042/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_1042" + op: "Mul" + input: "Mul_1042/x" + input: "bert/encoder/layer_11/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1043/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_1043" + op: "Mul" + input: "Mul_1043/x" + input: "clip_by_global_norm/clip_by_global_norm/_193" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_654" + op: "Add" + input: "Mul_1042" + input: "Mul_1043" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1044/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_1044" + op: "Mul" + input: "Mul_1044/x" + input: "bert/encoder/layer_11/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_193" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_193" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1045/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_1045" + op: "Mul" + input: "Mul_1045/x" + input: "Square_193" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_655" + op: "Add" + input: "Mul_1044" + input: "Mul_1045" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_193" + op: "Sqrt" + input: "add_655" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_656/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_656" + op: "Add" + input: "Sqrt_193" + input: "add_656/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_194" + op: "RealDiv" + input: "add_654" + input: "add_656" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_1046/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_1046" + op: "Mul" + input: "mul_1046/x" + input: "bert/encoder/layer_11/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_657" + op: "Add" + input: "truediv_194" + input: "mul_1046" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_1047" + op: "Mul" + input: "add" + input: "add_657" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_194" + op: "Sub" + input: "bert/encoder/layer_11/output/dense/kernel/read" + input: "mul_1047" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_778" + op: "Assign" + input: "bert/encoder/layer_11/output/dense/kernel" + input: "sub_194" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_779" + op: "Assign" + input: "bert/encoder/layer_11/output/dense/kernel/adam_m" + input: "add_654" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_780" + op: "Assign" + input: "bert/encoder/layer_11/output/dense/kernel/adam_v" + input: "add_655" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_11/output/dense/bias/adam_m" + input: "bert/encoder/layer_11/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_11/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_11/output/dense/bias/adam_v" + input: "bert/encoder/layer_11/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_11/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1048/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_1048" + op: "Mul" + input: "Mul_1048/x" + input: "bert/encoder/layer_11/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1049/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_1049" + op: "Mul" + input: "Mul_1049/x" + input: "clip_by_global_norm/clip_by_global_norm/_194" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_658" + op: "Add" + input: "Mul_1048" + input: "Mul_1049" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1050/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_1050" + op: "Mul" + input: "Mul_1050/x" + input: "bert/encoder/layer_11/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_194" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_194" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1051/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_1051" + op: "Mul" + input: "Mul_1051/x" + input: "Square_194" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_659" + op: "Add" + input: "Mul_1050" + input: "Mul_1051" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_194" + op: "Sqrt" + input: "add_659" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_660/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_660" + op: "Add" + input: "Sqrt_194" + input: "add_660/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_195" + op: "RealDiv" + input: "add_658" + input: "add_660" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_1052" + op: "Mul" + input: "add" + input: "truediv_195" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_195" + op: "Sub" + input: "bert/encoder/layer_11/output/dense/bias/read" + input: "mul_1052" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_781" + op: "Assign" + input: "bert/encoder/layer_11/output/dense/bias" + input: "sub_195" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_782" + op: "Assign" + input: "bert/encoder/layer_11/output/dense/bias/adam_m" + input: "add_658" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_783" + op: "Assign" + input: "bert/encoder/layer_11/output/dense/bias/adam_v" + input: "add_659" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1053/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_1053" + op: "Mul" + input: "Mul_1053/x" + input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1054/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_1054" + op: "Mul" + input: "Mul_1054/x" + input: "clip_by_global_norm/clip_by_global_norm/_195" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_661" + op: "Add" + input: "Mul_1053" + input: "Mul_1054" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1055/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_1055" + op: "Mul" + input: "Mul_1055/x" + input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_195" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_195" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1056/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_1056" + op: "Mul" + input: "Mul_1056/x" + input: "Square_195" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_662" + op: "Add" + input: "Mul_1055" + input: "Mul_1056" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_195" + op: "Sqrt" + input: "add_662" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_663/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_663" + op: "Add" + input: "Sqrt_195" + input: "add_663/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_196" + op: "RealDiv" + input: "add_661" + input: "add_663" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_1057" + op: "Mul" + input: "add" + input: "truediv_196" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_196" + op: "Sub" + input: "bert/encoder/layer_11/output/LayerNorm/beta/read" + input: "mul_1057" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_784" + op: "Assign" + input: "bert/encoder/layer_11/output/LayerNorm/beta" + input: "sub_196" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_785" + op: "Assign" + input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m" + input: "add_661" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_786" + op: "Assign" + input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v" + input: "add_662" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1058/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_1058" + op: "Mul" + input: "Mul_1058/x" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1059/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_1059" + op: "Mul" + input: "Mul_1059/x" + input: "clip_by_global_norm/clip_by_global_norm/_196" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_664" + op: "Add" + input: "Mul_1058" + input: "Mul_1059" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1060/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_1060" + op: "Mul" + input: "Mul_1060/x" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_196" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_196" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1061/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_1061" + op: "Mul" + input: "Mul_1061/x" + input: "Square_196" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_665" + op: "Add" + input: "Mul_1060" + input: "Mul_1061" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_196" + op: "Sqrt" + input: "add_665" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_666/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_666" + op: "Add" + input: "Sqrt_196" + input: "add_666/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_197" + op: "RealDiv" + input: "add_664" + input: "add_666" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_1062" + op: "Mul" + input: "add" + input: "truediv_197" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_197" + op: "Sub" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/read" + input: "mul_1062" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_787" + op: "Assign" + input: "bert/encoder/layer_11/output/LayerNorm/gamma" + input: "sub_197" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_788" + op: "Assign" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m" + input: "add_664" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_789" + op: "Assign" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v" + input: "add_665" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/pooler/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/pooler/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/pooler/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/pooler/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/pooler/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/pooler/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/pooler/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/pooler/dense/kernel/adam_m" + input: "bert/pooler/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/pooler/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/pooler/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/pooler/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/pooler/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/pooler/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/pooler/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/pooler/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/pooler/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/pooler/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/pooler/dense/kernel/adam_v" + input: "bert/pooler/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/pooler/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/pooler/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1063/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_1063" + op: "Mul" + input: "Mul_1063/x" + input: "bert/pooler/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1064/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_1064" + op: "Mul" + input: "Mul_1064/x" + input: "clip_by_global_norm/clip_by_global_norm/_197" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_667" + op: "Add" + input: "Mul_1063" + input: "Mul_1064" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1065/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_1065" + op: "Mul" + input: "Mul_1065/x" + input: "bert/pooler/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_197" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_197" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1066/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_1066" + op: "Mul" + input: "Mul_1066/x" + input: "Square_197" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_668" + op: "Add" + input: "Mul_1065" + input: "Mul_1066" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_197" + op: "Sqrt" + input: "add_668" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_669/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_669" + op: "Add" + input: "Sqrt_197" + input: "add_669/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_198" + op: "RealDiv" + input: "add_667" + input: "add_669" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_1067/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_1067" + op: "Mul" + input: "mul_1067/x" + input: "bert/pooler/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_670" + op: "Add" + input: "truediv_198" + input: "mul_1067" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_1068" + op: "Mul" + input: "add" + input: "add_670" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_198" + op: "Sub" + input: "bert/pooler/dense/kernel/read" + input: "mul_1068" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_790" + op: "Assign" + input: "bert/pooler/dense/kernel" + input: "sub_198" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_791" + op: "Assign" + input: "bert/pooler/dense/kernel/adam_m" + input: "add_667" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_792" + op: "Assign" + input: "bert/pooler/dense/kernel/adam_v" + input: "add_668" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/pooler/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/pooler/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/pooler/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/pooler/dense/bias/adam_m" + input: "bert/pooler/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/pooler/dense/bias/adam_m/read" + op: "Identity" + input: "bert/pooler/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/pooler/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/pooler/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/pooler/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/pooler/dense/bias/adam_v" + input: "bert/pooler/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/pooler/dense/bias/adam_v/read" + op: "Identity" + input: "bert/pooler/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1069/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_1069" + op: "Mul" + input: "Mul_1069/x" + input: "bert/pooler/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1070/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_1070" + op: "Mul" + input: "Mul_1070/x" + input: "clip_by_global_norm/clip_by_global_norm/_198" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_671" + op: "Add" + input: "Mul_1069" + input: "Mul_1070" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1071/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_1071" + op: "Mul" + input: "Mul_1071/x" + input: "bert/pooler/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_198" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_198" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1072/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_1072" + op: "Mul" + input: "Mul_1072/x" + input: "Square_198" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_672" + op: "Add" + input: "Mul_1071" + input: "Mul_1072" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_198" + op: "Sqrt" + input: "add_672" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_673/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_673" + op: "Add" + input: "Sqrt_198" + input: "add_673/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_199" + op: "RealDiv" + input: "add_671" + input: "add_673" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_1073" + op: "Mul" + input: "add" + input: "truediv_199" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_199" + op: "Sub" + input: "bert/pooler/dense/bias/read" + input: "mul_1073" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_793" + op: "Assign" + input: "bert/pooler/dense/bias" + input: "sub_199" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_794" + op: "Assign" + input: "bert/pooler/dense/bias/adam_m" + input: "add_671" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_795" + op: "Assign" + input: "bert/pooler/dense/bias/adam_v" + input: "add_672" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "output_weights/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@output_weights/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\003\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "output_weights/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@output_weights/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "output_weights/adam_m/Initializer/zeros" + op: "Fill" + input: "output_weights/adam_m/Initializer/zeros/shape_as_tensor" + input: "output_weights/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@output_weights/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "output_weights/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@output_weights/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "output_weights/adam_m/Assign" + op: "Assign" + input: "output_weights/adam_m" + input: "output_weights/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@output_weights/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "output_weights/adam_m/read" + op: "Identity" + input: "output_weights/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@output_weights/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "output_weights/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@output_weights/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\003\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "output_weights/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@output_weights/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "output_weights/adam_v/Initializer/zeros" + op: "Fill" + input: "output_weights/adam_v/Initializer/zeros/shape_as_tensor" + input: "output_weights/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@output_weights/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "output_weights/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@output_weights/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "output_weights/adam_v/Assign" + op: "Assign" + input: "output_weights/adam_v" + input: "output_weights/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@output_weights/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "output_weights/adam_v/read" + op: "Identity" + input: "output_weights/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@output_weights/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1074/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_1074" + op: "Mul" + input: "Mul_1074/x" + input: "output_weights/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1075/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_1075" + op: "Mul" + input: "Mul_1075/x" + input: "clip_by_global_norm/clip_by_global_norm/_199" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_674" + op: "Add" + input: "Mul_1074" + input: "Mul_1075" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1076/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_1076" + op: "Mul" + input: "Mul_1076/x" + input: "output_weights/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_199" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_199" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1077/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_1077" + op: "Mul" + input: "Mul_1077/x" + input: "Square_199" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_675" + op: "Add" + input: "Mul_1076" + input: "Mul_1077" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_199" + op: "Sqrt" + input: "add_675" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_676/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_676" + op: "Add" + input: "Sqrt_199" + input: "add_676/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_200" + op: "RealDiv" + input: "add_674" + input: "add_676" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_1078/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_1078" + op: "Mul" + input: "mul_1078/x" + input: "output_weights/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_677" + op: "Add" + input: "truediv_200" + input: "mul_1078" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_1079" + op: "Mul" + input: "add" + input: "add_677" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_200" + op: "Sub" + input: "output_weights/read" + input: "mul_1079" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_796" + op: "Assign" + input: "output_weights" + input: "sub_200" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@output_weights" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_797" + op: "Assign" + input: "output_weights/adam_m" + input: "add_674" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@output_weights/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_798" + op: "Assign" + input: "output_weights/adam_v" + input: "add_675" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@output_weights/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "output_bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 3 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "output_bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "output_bias/adam_m/Assign" + op: "Assign" + input: "output_bias/adam_m" + input: "output_bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "output_bias/adam_m/read" + op: "Identity" + input: "output_bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "output_bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 3 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "output_bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "output_bias/adam_v/Assign" + op: "Assign" + input: "output_bias/adam_v" + input: "output_bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "output_bias/adam_v/read" + op: "Identity" + input: "output_bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "Mul_1080/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_1080" + op: "Mul" + input: "Mul_1080/x" + input: "output_bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "Mul_1081/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_1081" + op: "Mul" + input: "Mul_1081/x" + input: "clip_by_global_norm/clip_by_global_norm/_200" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "add_678" + op: "Add" + input: "Mul_1080" + input: "Mul_1081" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "Mul_1082/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_1082" + op: "Mul" + input: "Mul_1082/x" + input: "output_bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "Square_200" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_200" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "Mul_1083/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_1083" + op: "Mul" + input: "Mul_1083/x" + input: "Square_200" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "add_679" + op: "Add" + input: "Mul_1082" + input: "Mul_1083" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "Sqrt_200" + op: "Sqrt" + input: "add_679" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "add_680/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_680" + op: "Add" + input: "Sqrt_200" + input: "add_680/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "truediv_201" + op: "RealDiv" + input: "add_678" + input: "add_680" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "mul_1084" + op: "Mul" + input: "add" + input: "truediv_201" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "sub_201" + op: "Sub" + input: "output_bias/read" + input: "mul_1084" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "Assign_799" + op: "Assign" + input: "output_bias" + input: "sub_201" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_800" + op: "Assign" + input: "output_bias/adam_m" + input: "add_678" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_801" + op: "Assign" + input: "output_bias/adam_v" + input: "add_679" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "group_deps" + op: "NoOp" + input: "^Assign_199" + input: "^Assign_200" + input: "^Assign_201" + input: "^Assign_202" + input: "^Assign_203" + input: "^Assign_204" + input: "^Assign_205" + input: "^Assign_206" + input: "^Assign_207" + input: "^Assign_208" + input: "^Assign_209" + input: "^Assign_210" + input: "^Assign_211" + input: "^Assign_212" + input: "^Assign_213" + input: "^Assign_214" + input: "^Assign_215" + input: "^Assign_216" + input: "^Assign_217" + input: "^Assign_218" + input: "^Assign_219" + input: "^Assign_220" + input: "^Assign_221" + input: "^Assign_222" + input: "^Assign_223" + input: "^Assign_224" + input: "^Assign_225" + input: "^Assign_226" + input: "^Assign_227" + input: "^Assign_228" + input: "^Assign_229" + input: "^Assign_230" + input: "^Assign_231" + input: "^Assign_232" + input: "^Assign_233" + input: "^Assign_234" + input: "^Assign_235" + input: "^Assign_236" + input: "^Assign_237" + input: "^Assign_238" + input: "^Assign_239" + input: "^Assign_240" + input: "^Assign_241" + input: "^Assign_242" + input: "^Assign_243" + input: "^Assign_244" + input: "^Assign_245" + input: "^Assign_246" + input: "^Assign_247" + input: "^Assign_248" + input: "^Assign_249" + input: "^Assign_250" + input: "^Assign_251" + input: "^Assign_252" + input: "^Assign_253" + input: "^Assign_254" + input: "^Assign_255" + input: "^Assign_256" + input: "^Assign_257" + input: "^Assign_258" + input: "^Assign_259" + input: "^Assign_260" + input: "^Assign_261" + input: "^Assign_262" + input: "^Assign_263" + input: "^Assign_264" + input: "^Assign_265" + input: "^Assign_266" + input: "^Assign_267" + input: "^Assign_268" + input: "^Assign_269" + input: "^Assign_270" + input: "^Assign_271" + input: "^Assign_272" + input: "^Assign_273" + input: "^Assign_274" + input: "^Assign_275" + input: "^Assign_276" + input: "^Assign_277" + input: "^Assign_278" + input: "^Assign_279" + input: "^Assign_280" + input: "^Assign_281" + input: "^Assign_282" + input: "^Assign_283" + input: "^Assign_284" + input: "^Assign_285" + input: "^Assign_286" + input: "^Assign_287" + input: "^Assign_288" + input: "^Assign_289" + input: "^Assign_290" + input: "^Assign_291" + input: "^Assign_292" + input: "^Assign_293" + input: "^Assign_294" + input: "^Assign_295" + input: "^Assign_296" + input: "^Assign_297" + input: "^Assign_298" + input: "^Assign_299" + input: "^Assign_300" + input: "^Assign_301" + input: "^Assign_302" + input: "^Assign_303" + input: "^Assign_304" + input: "^Assign_305" + input: "^Assign_306" + input: "^Assign_307" + input: "^Assign_308" + input: "^Assign_309" + input: "^Assign_310" + input: "^Assign_311" + input: "^Assign_312" + input: "^Assign_313" + input: "^Assign_314" + input: "^Assign_315" + input: "^Assign_316" + input: "^Assign_317" + input: "^Assign_318" + input: "^Assign_319" + input: "^Assign_320" + input: "^Assign_321" + input: "^Assign_322" + input: "^Assign_323" + input: "^Assign_324" + input: "^Assign_325" + input: "^Assign_326" + input: "^Assign_327" + input: "^Assign_328" + input: "^Assign_329" + input: "^Assign_330" + input: "^Assign_331" + input: "^Assign_332" + input: "^Assign_333" + input: "^Assign_334" + input: "^Assign_335" + input: "^Assign_336" + input: "^Assign_337" + input: "^Assign_338" + input: "^Assign_339" + input: "^Assign_340" + input: "^Assign_341" + input: "^Assign_342" + input: "^Assign_343" + input: "^Assign_344" + input: "^Assign_345" + input: "^Assign_346" + input: "^Assign_347" + input: "^Assign_348" + input: "^Assign_349" + input: "^Assign_350" + input: "^Assign_351" + input: "^Assign_352" + input: "^Assign_353" + input: "^Assign_354" + input: "^Assign_355" + input: "^Assign_356" + input: "^Assign_357" + input: "^Assign_358" + input: "^Assign_359" + input: "^Assign_360" + input: "^Assign_361" + input: "^Assign_362" + input: "^Assign_363" + input: "^Assign_364" + input: "^Assign_365" + input: "^Assign_366" + input: "^Assign_367" + input: "^Assign_368" + input: "^Assign_369" + input: "^Assign_370" + input: "^Assign_371" + input: "^Assign_372" + input: "^Assign_373" + input: "^Assign_374" + input: "^Assign_375" + input: "^Assign_376" + input: "^Assign_377" + input: "^Assign_378" + input: "^Assign_379" + input: "^Assign_380" + input: "^Assign_381" + input: "^Assign_382" + input: "^Assign_383" + input: "^Assign_384" + input: "^Assign_385" + input: "^Assign_386" + input: "^Assign_387" + input: "^Assign_388" + input: "^Assign_389" + input: "^Assign_390" + input: "^Assign_391" + input: "^Assign_392" + input: "^Assign_393" + input: "^Assign_394" + input: "^Assign_395" + input: "^Assign_396" + input: "^Assign_397" + input: "^Assign_398" + input: "^Assign_399" + input: "^Assign_400" + input: "^Assign_401" + input: "^Assign_402" + input: "^Assign_403" + input: "^Assign_404" + input: "^Assign_405" + input: "^Assign_406" + input: "^Assign_407" + input: "^Assign_408" + input: "^Assign_409" + input: "^Assign_410" + input: "^Assign_411" + input: "^Assign_412" + input: "^Assign_413" + input: "^Assign_414" + input: "^Assign_415" + input: "^Assign_416" + input: "^Assign_417" + input: "^Assign_418" + input: "^Assign_419" + input: "^Assign_420" + input: "^Assign_421" + input: "^Assign_422" + input: "^Assign_423" + input: "^Assign_424" + input: "^Assign_425" + input: "^Assign_426" + input: "^Assign_427" + input: "^Assign_428" + input: "^Assign_429" + input: "^Assign_430" + input: "^Assign_431" + input: "^Assign_432" + input: "^Assign_433" + input: "^Assign_434" + input: "^Assign_435" + input: "^Assign_436" + input: "^Assign_437" + input: "^Assign_438" + input: "^Assign_439" + input: "^Assign_440" + input: "^Assign_441" + input: "^Assign_442" + input: "^Assign_443" + input: "^Assign_444" + input: "^Assign_445" + input: "^Assign_446" + input: "^Assign_447" + input: "^Assign_448" + input: "^Assign_449" + input: "^Assign_450" + input: "^Assign_451" + input: "^Assign_452" + input: "^Assign_453" + input: "^Assign_454" + input: "^Assign_455" + input: "^Assign_456" + input: "^Assign_457" + input: "^Assign_458" + input: "^Assign_459" + input: "^Assign_460" + input: "^Assign_461" + input: "^Assign_462" + input: "^Assign_463" + input: "^Assign_464" + input: "^Assign_465" + input: "^Assign_466" + input: "^Assign_467" + input: "^Assign_468" + input: "^Assign_469" + input: "^Assign_470" + input: "^Assign_471" + input: "^Assign_472" + input: "^Assign_473" + input: "^Assign_474" + input: "^Assign_475" + input: "^Assign_476" + input: "^Assign_477" + input: "^Assign_478" + input: "^Assign_479" + input: "^Assign_480" + input: "^Assign_481" + input: "^Assign_482" + input: "^Assign_483" + input: "^Assign_484" + input: "^Assign_485" + input: "^Assign_486" + input: "^Assign_487" + input: "^Assign_488" + input: "^Assign_489" + input: "^Assign_490" + input: "^Assign_491" + input: "^Assign_492" + input: "^Assign_493" + input: "^Assign_494" + input: "^Assign_495" + input: "^Assign_496" + input: "^Assign_497" + input: "^Assign_498" + input: "^Assign_499" + input: "^Assign_500" + input: "^Assign_501" + input: "^Assign_502" + input: "^Assign_503" + input: "^Assign_504" + input: "^Assign_505" + input: "^Assign_506" + input: "^Assign_507" + input: "^Assign_508" + input: "^Assign_509" + input: "^Assign_510" + input: "^Assign_511" + input: "^Assign_512" + input: "^Assign_513" + input: "^Assign_514" + input: "^Assign_515" + input: "^Assign_516" + input: "^Assign_517" + input: "^Assign_518" + input: "^Assign_519" + input: "^Assign_520" + input: "^Assign_521" + input: "^Assign_522" + input: "^Assign_523" + input: "^Assign_524" + input: "^Assign_525" + input: "^Assign_526" + input: "^Assign_527" + input: "^Assign_528" + input: "^Assign_529" + input: "^Assign_530" + input: "^Assign_531" + input: "^Assign_532" + input: "^Assign_533" + input: "^Assign_534" + input: "^Assign_535" + input: "^Assign_536" + input: "^Assign_537" + input: "^Assign_538" + input: "^Assign_539" + input: "^Assign_540" + input: "^Assign_541" + input: "^Assign_542" + input: "^Assign_543" + input: "^Assign_544" + input: "^Assign_545" + input: "^Assign_546" + input: "^Assign_547" + input: "^Assign_548" + input: "^Assign_549" + input: "^Assign_550" + input: "^Assign_551" + input: "^Assign_552" + input: "^Assign_553" + input: "^Assign_554" + input: "^Assign_555" + input: "^Assign_556" + input: "^Assign_557" + input: "^Assign_558" + input: "^Assign_559" + input: "^Assign_560" + input: "^Assign_561" + input: "^Assign_562" + input: "^Assign_563" + input: "^Assign_564" + input: "^Assign_565" + input: "^Assign_566" + input: "^Assign_567" + input: "^Assign_568" + input: "^Assign_569" + input: "^Assign_570" + input: "^Assign_571" + input: "^Assign_572" + input: "^Assign_573" + input: "^Assign_574" + input: "^Assign_575" + input: "^Assign_576" + input: "^Assign_577" + input: "^Assign_578" + input: "^Assign_579" + input: "^Assign_580" + input: "^Assign_581" + input: "^Assign_582" + input: "^Assign_583" + input: "^Assign_584" + input: "^Assign_585" + input: "^Assign_586" + input: "^Assign_587" + input: "^Assign_588" + input: "^Assign_589" + input: "^Assign_590" + input: "^Assign_591" + input: "^Assign_592" + input: "^Assign_593" + input: "^Assign_594" + input: "^Assign_595" + input: "^Assign_596" + input: "^Assign_597" + input: "^Assign_598" + input: "^Assign_599" + input: "^Assign_600" + input: "^Assign_601" + input: "^Assign_602" + input: "^Assign_603" + input: "^Assign_604" + input: "^Assign_605" + input: "^Assign_606" + input: "^Assign_607" + input: "^Assign_608" + input: "^Assign_609" + input: "^Assign_610" + input: "^Assign_611" + input: "^Assign_612" + input: "^Assign_613" + input: "^Assign_614" + input: "^Assign_615" + input: "^Assign_616" + input: "^Assign_617" + input: "^Assign_618" + input: "^Assign_619" + input: "^Assign_620" + input: "^Assign_621" + input: "^Assign_622" + input: "^Assign_623" + input: "^Assign_624" + input: "^Assign_625" + input: "^Assign_626" + input: "^Assign_627" + input: "^Assign_628" + input: "^Assign_629" + input: "^Assign_630" + input: "^Assign_631" + input: "^Assign_632" + input: "^Assign_633" + input: "^Assign_634" + input: "^Assign_635" + input: "^Assign_636" + input: "^Assign_637" + input: "^Assign_638" + input: "^Assign_639" + input: "^Assign_640" + input: "^Assign_641" + input: "^Assign_642" + input: "^Assign_643" + input: "^Assign_644" + input: "^Assign_645" + input: "^Assign_646" + input: "^Assign_647" + input: "^Assign_648" + input: "^Assign_649" + input: "^Assign_650" + input: "^Assign_651" + input: "^Assign_652" + input: "^Assign_653" + input: "^Assign_654" + input: "^Assign_655" + input: "^Assign_656" + input: "^Assign_657" + input: "^Assign_658" + input: "^Assign_659" + input: "^Assign_660" + input: "^Assign_661" + input: "^Assign_662" + input: "^Assign_663" + input: "^Assign_664" + input: "^Assign_665" + input: "^Assign_666" + input: "^Assign_667" + input: "^Assign_668" + input: "^Assign_669" + input: "^Assign_670" + input: "^Assign_671" + input: "^Assign_672" + input: "^Assign_673" + input: "^Assign_674" + input: "^Assign_675" + input: "^Assign_676" + input: "^Assign_677" + input: "^Assign_678" + input: "^Assign_679" + input: "^Assign_680" + input: "^Assign_681" + input: "^Assign_682" + input: "^Assign_683" + input: "^Assign_684" + input: "^Assign_685" + input: "^Assign_686" + input: "^Assign_687" + input: "^Assign_688" + input: "^Assign_689" + input: "^Assign_690" + input: "^Assign_691" + input: "^Assign_692" + input: "^Assign_693" + input: "^Assign_694" + input: "^Assign_695" + input: "^Assign_696" + input: "^Assign_697" + input: "^Assign_698" + input: "^Assign_699" + input: "^Assign_700" + input: "^Assign_701" + input: "^Assign_702" + input: "^Assign_703" + input: "^Assign_704" + input: "^Assign_705" + input: "^Assign_706" + input: "^Assign_707" + input: "^Assign_708" + input: "^Assign_709" + input: "^Assign_710" + input: "^Assign_711" + input: "^Assign_712" + input: "^Assign_713" + input: "^Assign_714" + input: "^Assign_715" + input: "^Assign_716" + input: "^Assign_717" + input: "^Assign_718" + input: "^Assign_719" + input: "^Assign_720" + input: "^Assign_721" + input: "^Assign_722" + input: "^Assign_723" + input: "^Assign_724" + input: "^Assign_725" + input: "^Assign_726" + input: "^Assign_727" + input: "^Assign_728" + input: "^Assign_729" + input: "^Assign_730" + input: "^Assign_731" + input: "^Assign_732" + input: "^Assign_733" + input: "^Assign_734" + input: "^Assign_735" + input: "^Assign_736" + input: "^Assign_737" + input: "^Assign_738" + input: "^Assign_739" + input: "^Assign_740" + input: "^Assign_741" + input: "^Assign_742" + input: "^Assign_743" + input: "^Assign_744" + input: "^Assign_745" + input: "^Assign_746" + input: "^Assign_747" + input: "^Assign_748" + input: "^Assign_749" + input: "^Assign_750" + input: "^Assign_751" + input: "^Assign_752" + input: "^Assign_753" + input: "^Assign_754" + input: "^Assign_755" + input: "^Assign_756" + input: "^Assign_757" + input: "^Assign_758" + input: "^Assign_759" + input: "^Assign_760" + input: "^Assign_761" + input: "^Assign_762" + input: "^Assign_763" + input: "^Assign_764" + input: "^Assign_765" + input: "^Assign_766" + input: "^Assign_767" + input: "^Assign_768" + input: "^Assign_769" + input: "^Assign_770" + input: "^Assign_771" + input: "^Assign_772" + input: "^Assign_773" + input: "^Assign_774" + input: "^Assign_775" + input: "^Assign_776" + input: "^Assign_777" + input: "^Assign_778" + input: "^Assign_779" + input: "^Assign_780" + input: "^Assign_781" + input: "^Assign_782" + input: "^Assign_783" + input: "^Assign_784" + input: "^Assign_785" + input: "^Assign_786" + input: "^Assign_787" + input: "^Assign_788" + input: "^Assign_789" + input: "^Assign_790" + input: "^Assign_791" + input: "^Assign_792" + input: "^Assign_793" + input: "^Assign_794" + input: "^Assign_795" + input: "^Assign_796" + input: "^Assign_797" + input: "^Assign_798" + input: "^Assign_799" + input: "^Assign_800" + input: "^Assign_801" +} +node { + name: "ReadVariableOp" + op: "ReadVariableOp" + input: "global_step" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } +} +node { + name: "add_681/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 1 + } + } + } +} +node { + name: "add_681" + op: "Add" + input: "ReadVariableOp" + input: "add_681/y" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "AssignVariableOp" + op: "AssignVariableOp" + input: "global_step" + input: "add_681" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } +} +node { + name: "ReadVariableOp_1" + op: "ReadVariableOp" + input: "global_step" + input: "^AssignVariableOp" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } +} +node { + name: "group_deps_1" + op: "NoOp" + input: "^AssignVariableOp" + input: "^group_deps" +} +node { + name: "loss_1/tags" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "loss_1" + } + } + } +} +node { + name: "loss_1" + op: "ScalarSummary" + input: "loss_1/tags" + input: "loss/Mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "init" + op: "NoOp" + input: "^Assign" + input: "^Assign_1" + input: "^Assign_10" + input: "^Assign_100" + input: "^Assign_101" + input: "^Assign_102" + input: "^Assign_103" + input: "^Assign_104" + input: "^Assign_105" + input: "^Assign_106" + input: "^Assign_107" + input: "^Assign_108" + input: "^Assign_109" + input: "^Assign_11" + input: "^Assign_110" + input: "^Assign_111" + input: "^Assign_112" + input: "^Assign_113" + input: "^Assign_114" + input: "^Assign_115" + input: "^Assign_116" + input: "^Assign_117" + input: "^Assign_118" + input: "^Assign_119" + input: "^Assign_12" + input: "^Assign_120" + input: "^Assign_121" + input: "^Assign_122" + input: "^Assign_123" + input: "^Assign_124" + input: "^Assign_125" + input: "^Assign_126" + input: "^Assign_127" + input: "^Assign_128" + input: "^Assign_129" + input: "^Assign_13" + input: "^Assign_130" + input: "^Assign_131" + input: "^Assign_132" + input: "^Assign_133" + input: "^Assign_134" + input: "^Assign_135" + input: "^Assign_136" + input: "^Assign_137" + input: "^Assign_138" + input: "^Assign_139" + input: "^Assign_14" + input: "^Assign_140" + input: "^Assign_141" + input: "^Assign_142" + input: "^Assign_143" + input: "^Assign_144" + input: "^Assign_145" + input: "^Assign_146" + input: "^Assign_147" + input: "^Assign_148" + input: "^Assign_149" + input: "^Assign_15" + input: "^Assign_150" + input: "^Assign_151" + input: "^Assign_152" + input: "^Assign_153" + input: "^Assign_154" + input: "^Assign_155" + input: "^Assign_156" + input: "^Assign_157" + input: "^Assign_158" + input: "^Assign_159" + input: "^Assign_16" + input: "^Assign_160" + input: "^Assign_161" + input: "^Assign_162" + input: "^Assign_163" + input: "^Assign_164" + input: "^Assign_165" + input: "^Assign_166" + input: "^Assign_167" + input: "^Assign_168" + input: "^Assign_169" + input: "^Assign_17" + input: "^Assign_170" + input: "^Assign_171" + input: "^Assign_172" + input: "^Assign_173" + input: "^Assign_174" + input: "^Assign_175" + input: "^Assign_176" + input: "^Assign_177" + input: "^Assign_178" + input: "^Assign_179" + input: "^Assign_18" + input: "^Assign_180" + input: "^Assign_181" + input: "^Assign_182" + input: "^Assign_183" + input: "^Assign_184" + input: "^Assign_185" + input: "^Assign_186" + input: "^Assign_187" + input: "^Assign_188" + input: "^Assign_189" + input: "^Assign_19" + input: "^Assign_190" + input: "^Assign_191" + input: "^Assign_192" + input: "^Assign_193" + input: "^Assign_194" + input: "^Assign_195" + input: "^Assign_196" + input: "^Assign_197" + input: "^Assign_198" + input: "^Assign_2" + input: "^Assign_20" + input: "^Assign_21" + input: "^Assign_22" + input: "^Assign_23" + input: "^Assign_24" + input: "^Assign_25" + input: "^Assign_26" + input: "^Assign_27" + input: "^Assign_28" + input: "^Assign_29" + input: "^Assign_3" + input: "^Assign_30" + input: "^Assign_31" + input: "^Assign_32" + input: "^Assign_33" + input: "^Assign_34" + input: "^Assign_35" + input: "^Assign_36" + input: "^Assign_37" + input: "^Assign_38" + input: "^Assign_39" + input: "^Assign_4" + input: "^Assign_40" + input: "^Assign_41" + input: "^Assign_42" + input: "^Assign_43" + input: "^Assign_44" + input: "^Assign_45" + input: "^Assign_46" + input: "^Assign_47" + input: "^Assign_48" + input: "^Assign_49" + input: "^Assign_5" + input: "^Assign_50" + input: "^Assign_51" + input: "^Assign_52" + input: "^Assign_53" + input: "^Assign_54" + input: "^Assign_55" + input: "^Assign_56" + input: "^Assign_57" + input: "^Assign_58" + input: "^Assign_59" + input: "^Assign_6" + input: "^Assign_60" + input: "^Assign_61" + input: "^Assign_62" + input: "^Assign_63" + input: "^Assign_64" + input: "^Assign_65" + input: "^Assign_66" + input: "^Assign_67" + input: "^Assign_68" + input: "^Assign_69" + input: "^Assign_7" + input: "^Assign_70" + input: "^Assign_71" + input: "^Assign_72" + input: "^Assign_73" + input: "^Assign_74" + input: "^Assign_75" + input: "^Assign_76" + input: "^Assign_77" + input: "^Assign_78" + input: "^Assign_79" + input: "^Assign_8" + input: "^Assign_80" + input: "^Assign_81" + input: "^Assign_82" + input: "^Assign_83" + input: "^Assign_84" + input: "^Assign_85" + input: "^Assign_86" + input: "^Assign_87" + input: "^Assign_88" + input: "^Assign_89" + input: "^Assign_9" + input: "^Assign_90" + input: "^Assign_91" + input: "^Assign_92" + input: "^Assign_93" + input: "^Assign_94" + input: "^Assign_95" + input: "^Assign_96" + input: "^Assign_97" + input: "^Assign_98" + input: "^Assign_99" + input: "^bert/embeddings/LayerNorm/beta/adam_m/Assign" + input: "^bert/embeddings/LayerNorm/beta/adam_v/Assign" + input: "^bert/embeddings/LayerNorm/gamma/adam_m/Assign" + input: "^bert/embeddings/LayerNorm/gamma/adam_v/Assign" + input: "^bert/embeddings/position_embeddings/adam_m/Assign" + input: "^bert/embeddings/position_embeddings/adam_v/Assign" + input: "^bert/embeddings/token_type_embeddings/adam_m/Assign" + input: "^bert/embeddings/token_type_embeddings/adam_v/Assign" + input: "^bert/embeddings/word_embeddings/adam_m/Assign" + input: "^bert/embeddings/word_embeddings/adam_v/Assign" + input: "^bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_0/attention/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_0/attention/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_0/attention/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_0/attention/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_0/attention/self/key/bias/adam_m/Assign" + input: "^bert/encoder/layer_0/attention/self/key/bias/adam_v/Assign" + input: "^bert/encoder/layer_0/attention/self/key/kernel/adam_m/Assign" + input: "^bert/encoder/layer_0/attention/self/key/kernel/adam_v/Assign" + input: "^bert/encoder/layer_0/attention/self/query/bias/adam_m/Assign" + input: "^bert/encoder/layer_0/attention/self/query/bias/adam_v/Assign" + input: "^bert/encoder/layer_0/attention/self/query/kernel/adam_m/Assign" + input: "^bert/encoder/layer_0/attention/self/query/kernel/adam_v/Assign" + input: "^bert/encoder/layer_0/attention/self/value/bias/adam_m/Assign" + input: "^bert/encoder/layer_0/attention/self/value/bias/adam_v/Assign" + input: "^bert/encoder/layer_0/attention/self/value/kernel/adam_m/Assign" + input: "^bert/encoder/layer_0/attention/self/value/kernel/adam_v/Assign" + input: "^bert/encoder/layer_0/intermediate/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_0/intermediate/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_0/intermediate/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_0/intermediate/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_0/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_0/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_0/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_0/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_0/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_0/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_0/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_0/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_1/attention/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_1/attention/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_1/attention/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_1/attention/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_1/attention/self/key/bias/adam_m/Assign" + input: "^bert/encoder/layer_1/attention/self/key/bias/adam_v/Assign" + input: "^bert/encoder/layer_1/attention/self/key/kernel/adam_m/Assign" + input: "^bert/encoder/layer_1/attention/self/key/kernel/adam_v/Assign" + input: "^bert/encoder/layer_1/attention/self/query/bias/adam_m/Assign" + input: "^bert/encoder/layer_1/attention/self/query/bias/adam_v/Assign" + input: "^bert/encoder/layer_1/attention/self/query/kernel/adam_m/Assign" + input: "^bert/encoder/layer_1/attention/self/query/kernel/adam_v/Assign" + input: "^bert/encoder/layer_1/attention/self/value/bias/adam_m/Assign" + input: "^bert/encoder/layer_1/attention/self/value/bias/adam_v/Assign" + input: "^bert/encoder/layer_1/attention/self/value/kernel/adam_m/Assign" + input: "^bert/encoder/layer_1/attention/self/value/kernel/adam_v/Assign" + input: "^bert/encoder/layer_1/intermediate/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_1/intermediate/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_1/intermediate/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_1/intermediate/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_1/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_1/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_1/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_1/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_1/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_1/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_1/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_1/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_10/attention/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_10/attention/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_10/attention/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_10/attention/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_10/attention/self/key/bias/adam_m/Assign" + input: "^bert/encoder/layer_10/attention/self/key/bias/adam_v/Assign" + input: "^bert/encoder/layer_10/attention/self/key/kernel/adam_m/Assign" + input: "^bert/encoder/layer_10/attention/self/key/kernel/adam_v/Assign" + input: "^bert/encoder/layer_10/attention/self/query/bias/adam_m/Assign" + input: "^bert/encoder/layer_10/attention/self/query/bias/adam_v/Assign" + input: "^bert/encoder/layer_10/attention/self/query/kernel/adam_m/Assign" + input: "^bert/encoder/layer_10/attention/self/query/kernel/adam_v/Assign" + input: "^bert/encoder/layer_10/attention/self/value/bias/adam_m/Assign" + input: "^bert/encoder/layer_10/attention/self/value/bias/adam_v/Assign" + input: "^bert/encoder/layer_10/attention/self/value/kernel/adam_m/Assign" + input: "^bert/encoder/layer_10/attention/self/value/kernel/adam_v/Assign" + input: "^bert/encoder/layer_10/intermediate/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_10/intermediate/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_10/intermediate/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_10/intermediate/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_10/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_10/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_10/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_10/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_10/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_10/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_10/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_10/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_11/attention/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_11/attention/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_11/attention/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_11/attention/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_11/attention/self/key/bias/adam_m/Assign" + input: "^bert/encoder/layer_11/attention/self/key/bias/adam_v/Assign" + input: "^bert/encoder/layer_11/attention/self/key/kernel/adam_m/Assign" + input: "^bert/encoder/layer_11/attention/self/key/kernel/adam_v/Assign" + input: "^bert/encoder/layer_11/attention/self/query/bias/adam_m/Assign" + input: "^bert/encoder/layer_11/attention/self/query/bias/adam_v/Assign" + input: "^bert/encoder/layer_11/attention/self/query/kernel/adam_m/Assign" + input: "^bert/encoder/layer_11/attention/self/query/kernel/adam_v/Assign" + input: "^bert/encoder/layer_11/attention/self/value/bias/adam_m/Assign" + input: "^bert/encoder/layer_11/attention/self/value/bias/adam_v/Assign" + input: "^bert/encoder/layer_11/attention/self/value/kernel/adam_m/Assign" + input: "^bert/encoder/layer_11/attention/self/value/kernel/adam_v/Assign" + input: "^bert/encoder/layer_11/intermediate/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_11/intermediate/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_11/intermediate/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_11/intermediate/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_11/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_11/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_11/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_11/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_11/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_11/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_11/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_11/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_2/attention/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_2/attention/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_2/attention/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_2/attention/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_2/attention/self/key/bias/adam_m/Assign" + input: "^bert/encoder/layer_2/attention/self/key/bias/adam_v/Assign" + input: "^bert/encoder/layer_2/attention/self/key/kernel/adam_m/Assign" + input: "^bert/encoder/layer_2/attention/self/key/kernel/adam_v/Assign" + input: "^bert/encoder/layer_2/attention/self/query/bias/adam_m/Assign" + input: "^bert/encoder/layer_2/attention/self/query/bias/adam_v/Assign" + input: "^bert/encoder/layer_2/attention/self/query/kernel/adam_m/Assign" + input: "^bert/encoder/layer_2/attention/self/query/kernel/adam_v/Assign" + input: "^bert/encoder/layer_2/attention/self/value/bias/adam_m/Assign" + input: "^bert/encoder/layer_2/attention/self/value/bias/adam_v/Assign" + input: "^bert/encoder/layer_2/attention/self/value/kernel/adam_m/Assign" + input: "^bert/encoder/layer_2/attention/self/value/kernel/adam_v/Assign" + input: "^bert/encoder/layer_2/intermediate/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_2/intermediate/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_2/intermediate/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_2/intermediate/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_2/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_2/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_2/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_2/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_2/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_2/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_2/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_2/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_3/attention/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_3/attention/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_3/attention/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_3/attention/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_3/attention/self/key/bias/adam_m/Assign" + input: "^bert/encoder/layer_3/attention/self/key/bias/adam_v/Assign" + input: "^bert/encoder/layer_3/attention/self/key/kernel/adam_m/Assign" + input: "^bert/encoder/layer_3/attention/self/key/kernel/adam_v/Assign" + input: "^bert/encoder/layer_3/attention/self/query/bias/adam_m/Assign" + input: "^bert/encoder/layer_3/attention/self/query/bias/adam_v/Assign" + input: "^bert/encoder/layer_3/attention/self/query/kernel/adam_m/Assign" + input: "^bert/encoder/layer_3/attention/self/query/kernel/adam_v/Assign" + input: "^bert/encoder/layer_3/attention/self/value/bias/adam_m/Assign" + input: "^bert/encoder/layer_3/attention/self/value/bias/adam_v/Assign" + input: "^bert/encoder/layer_3/attention/self/value/kernel/adam_m/Assign" + input: "^bert/encoder/layer_3/attention/self/value/kernel/adam_v/Assign" + input: "^bert/encoder/layer_3/intermediate/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_3/intermediate/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_3/intermediate/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_3/intermediate/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_3/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_3/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_3/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_3/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_3/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_3/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_3/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_3/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_4/attention/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_4/attention/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_4/attention/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_4/attention/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_4/attention/self/key/bias/adam_m/Assign" + input: "^bert/encoder/layer_4/attention/self/key/bias/adam_v/Assign" + input: "^bert/encoder/layer_4/attention/self/key/kernel/adam_m/Assign" + input: "^bert/encoder/layer_4/attention/self/key/kernel/adam_v/Assign" + input: "^bert/encoder/layer_4/attention/self/query/bias/adam_m/Assign" + input: "^bert/encoder/layer_4/attention/self/query/bias/adam_v/Assign" + input: "^bert/encoder/layer_4/attention/self/query/kernel/adam_m/Assign" + input: "^bert/encoder/layer_4/attention/self/query/kernel/adam_v/Assign" + input: "^bert/encoder/layer_4/attention/self/value/bias/adam_m/Assign" + input: "^bert/encoder/layer_4/attention/self/value/bias/adam_v/Assign" + input: "^bert/encoder/layer_4/attention/self/value/kernel/adam_m/Assign" + input: "^bert/encoder/layer_4/attention/self/value/kernel/adam_v/Assign" + input: "^bert/encoder/layer_4/intermediate/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_4/intermediate/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_4/intermediate/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_4/intermediate/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_4/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_4/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_4/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_4/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_4/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_4/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_4/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_4/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_5/attention/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_5/attention/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_5/attention/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_5/attention/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_5/attention/self/key/bias/adam_m/Assign" + input: "^bert/encoder/layer_5/attention/self/key/bias/adam_v/Assign" + input: "^bert/encoder/layer_5/attention/self/key/kernel/adam_m/Assign" + input: "^bert/encoder/layer_5/attention/self/key/kernel/adam_v/Assign" + input: "^bert/encoder/layer_5/attention/self/query/bias/adam_m/Assign" + input: "^bert/encoder/layer_5/attention/self/query/bias/adam_v/Assign" + input: "^bert/encoder/layer_5/attention/self/query/kernel/adam_m/Assign" + input: "^bert/encoder/layer_5/attention/self/query/kernel/adam_v/Assign" + input: "^bert/encoder/layer_5/attention/self/value/bias/adam_m/Assign" + input: "^bert/encoder/layer_5/attention/self/value/bias/adam_v/Assign" + input: "^bert/encoder/layer_5/attention/self/value/kernel/adam_m/Assign" + input: "^bert/encoder/layer_5/attention/self/value/kernel/adam_v/Assign" + input: "^bert/encoder/layer_5/intermediate/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_5/intermediate/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_5/intermediate/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_5/intermediate/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_5/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_5/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_5/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_5/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_5/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_5/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_5/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_5/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_6/attention/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_6/attention/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_6/attention/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_6/attention/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_6/attention/self/key/bias/adam_m/Assign" + input: "^bert/encoder/layer_6/attention/self/key/bias/adam_v/Assign" + input: "^bert/encoder/layer_6/attention/self/key/kernel/adam_m/Assign" + input: "^bert/encoder/layer_6/attention/self/key/kernel/adam_v/Assign" + input: "^bert/encoder/layer_6/attention/self/query/bias/adam_m/Assign" + input: "^bert/encoder/layer_6/attention/self/query/bias/adam_v/Assign" + input: "^bert/encoder/layer_6/attention/self/query/kernel/adam_m/Assign" + input: "^bert/encoder/layer_6/attention/self/query/kernel/adam_v/Assign" + input: "^bert/encoder/layer_6/attention/self/value/bias/adam_m/Assign" + input: "^bert/encoder/layer_6/attention/self/value/bias/adam_v/Assign" + input: "^bert/encoder/layer_6/attention/self/value/kernel/adam_m/Assign" + input: "^bert/encoder/layer_6/attention/self/value/kernel/adam_v/Assign" + input: "^bert/encoder/layer_6/intermediate/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_6/intermediate/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_6/intermediate/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_6/intermediate/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_6/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_6/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_6/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_6/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_6/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_6/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_6/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_6/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_7/attention/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_7/attention/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_7/attention/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_7/attention/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_7/attention/self/key/bias/adam_m/Assign" + input: "^bert/encoder/layer_7/attention/self/key/bias/adam_v/Assign" + input: "^bert/encoder/layer_7/attention/self/key/kernel/adam_m/Assign" + input: "^bert/encoder/layer_7/attention/self/key/kernel/adam_v/Assign" + input: "^bert/encoder/layer_7/attention/self/query/bias/adam_m/Assign" + input: "^bert/encoder/layer_7/attention/self/query/bias/adam_v/Assign" + input: "^bert/encoder/layer_7/attention/self/query/kernel/adam_m/Assign" + input: "^bert/encoder/layer_7/attention/self/query/kernel/adam_v/Assign" + input: "^bert/encoder/layer_7/attention/self/value/bias/adam_m/Assign" + input: "^bert/encoder/layer_7/attention/self/value/bias/adam_v/Assign" + input: "^bert/encoder/layer_7/attention/self/value/kernel/adam_m/Assign" + input: "^bert/encoder/layer_7/attention/self/value/kernel/adam_v/Assign" + input: "^bert/encoder/layer_7/intermediate/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_7/intermediate/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_7/intermediate/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_7/intermediate/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_7/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_7/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_7/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_7/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_7/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_7/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_7/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_7/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_8/attention/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_8/attention/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_8/attention/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_8/attention/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_8/attention/self/key/bias/adam_m/Assign" + input: "^bert/encoder/layer_8/attention/self/key/bias/adam_v/Assign" + input: "^bert/encoder/layer_8/attention/self/key/kernel/adam_m/Assign" + input: "^bert/encoder/layer_8/attention/self/key/kernel/adam_v/Assign" + input: "^bert/encoder/layer_8/attention/self/query/bias/adam_m/Assign" + input: "^bert/encoder/layer_8/attention/self/query/bias/adam_v/Assign" + input: "^bert/encoder/layer_8/attention/self/query/kernel/adam_m/Assign" + input: "^bert/encoder/layer_8/attention/self/query/kernel/adam_v/Assign" + input: "^bert/encoder/layer_8/attention/self/value/bias/adam_m/Assign" + input: "^bert/encoder/layer_8/attention/self/value/bias/adam_v/Assign" + input: "^bert/encoder/layer_8/attention/self/value/kernel/adam_m/Assign" + input: "^bert/encoder/layer_8/attention/self/value/kernel/adam_v/Assign" + input: "^bert/encoder/layer_8/intermediate/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_8/intermediate/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_8/intermediate/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_8/intermediate/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_8/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_8/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_8/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_8/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_8/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_8/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_8/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_8/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_9/attention/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_9/attention/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_9/attention/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_9/attention/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_9/attention/self/key/bias/adam_m/Assign" + input: "^bert/encoder/layer_9/attention/self/key/bias/adam_v/Assign" + input: "^bert/encoder/layer_9/attention/self/key/kernel/adam_m/Assign" + input: "^bert/encoder/layer_9/attention/self/key/kernel/adam_v/Assign" + input: "^bert/encoder/layer_9/attention/self/query/bias/adam_m/Assign" + input: "^bert/encoder/layer_9/attention/self/query/bias/adam_v/Assign" + input: "^bert/encoder/layer_9/attention/self/query/kernel/adam_m/Assign" + input: "^bert/encoder/layer_9/attention/self/query/kernel/adam_v/Assign" + input: "^bert/encoder/layer_9/attention/self/value/bias/adam_m/Assign" + input: "^bert/encoder/layer_9/attention/self/value/bias/adam_v/Assign" + input: "^bert/encoder/layer_9/attention/self/value/kernel/adam_m/Assign" + input: "^bert/encoder/layer_9/attention/self/value/kernel/adam_v/Assign" + input: "^bert/encoder/layer_9/intermediate/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_9/intermediate/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_9/intermediate/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_9/intermediate/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_9/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_9/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_9/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_9/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_9/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_9/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_9/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_9/output/dense/kernel/adam_v/Assign" + input: "^bert/pooler/dense/bias/adam_m/Assign" + input: "^bert/pooler/dense/bias/adam_v/Assign" + input: "^bert/pooler/dense/kernel/adam_m/Assign" + input: "^bert/pooler/dense/kernel/adam_v/Assign" + input: "^global_step/Assign" + input: "^output_bias/Assign" + input: "^output_bias/adam_m/Assign" + input: "^output_bias/adam_v/Assign" + input: "^output_weights/Assign" + input: "^output_weights/adam_m/Assign" + input: "^output_weights/adam_v/Assign" +} +node { + name: "init_1" + op: "NoOp" +} +node { + name: "group_deps_2" + op: "NoOp" + input: "^init" + input: "^init_1" +} +node { + name: "report_uninitialized_variables/VarIsInitializedOp" + op: "VarIsInitializedOp" + input: "global_step" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized" + op: "IsVariableInitialized" + input: "bert/embeddings/word_embeddings" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_1" + op: "IsVariableInitialized" + input: "bert/embeddings/token_type_embeddings" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_2" + op: "IsVariableInitialized" + input: "bert/embeddings/position_embeddings" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_3" + op: "IsVariableInitialized" + input: "bert/embeddings/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_4" + op: "IsVariableInitialized" + input: "bert/embeddings/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_5" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_6" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_7" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_8" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_9" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_10" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_11" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_12" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_13" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_14" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_15" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_16" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_17" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_18" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_19" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_20" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_21" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_22" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_23" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_24" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_25" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_26" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_27" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_28" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_29" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_30" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_31" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_32" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_33" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_34" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_35" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_36" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_37" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_38" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_39" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_40" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_41" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_42" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_43" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_44" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_45" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_46" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_47" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_48" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_49" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_50" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_51" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_52" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_53" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_54" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_55" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_56" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_57" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_58" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_59" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_60" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_61" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_62" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_63" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_64" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_65" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_66" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_67" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_68" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_69" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_70" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_71" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_72" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_73" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_74" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_75" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_76" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_77" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_78" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_79" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_80" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_81" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_82" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_83" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_84" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_85" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_86" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_87" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_88" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_89" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_90" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_91" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_92" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_93" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_94" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_95" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_96" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_97" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_98" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_99" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_100" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_101" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_102" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_103" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_104" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_105" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_106" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_107" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_108" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_109" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_110" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_111" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_112" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_113" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_114" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_115" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_116" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_117" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_118" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_119" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_120" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_121" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_122" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_123" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_124" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_125" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_126" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_127" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_128" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_129" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_130" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_131" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_132" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_133" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_134" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_135" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_136" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_137" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_138" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_139" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_140" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_141" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_142" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_143" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_144" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_145" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_146" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_147" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_148" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_149" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_150" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_151" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_152" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_153" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_154" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_155" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_156" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_157" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_158" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_159" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_160" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_161" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_162" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_163" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_164" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_165" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_166" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_167" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_168" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_169" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_170" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_171" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_172" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_173" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_174" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_175" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_176" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_177" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_178" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_179" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_180" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_181" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_182" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_183" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_184" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_185" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_186" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_187" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_188" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_189" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_190" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_191" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_192" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_193" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_194" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_195" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_196" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_197" + op: "IsVariableInitialized" + input: "bert/pooler/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_198" + op: "IsVariableInitialized" + input: "bert/pooler/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_199" + op: "IsVariableInitialized" + input: "output_weights" + attr { + key: "_class" + value { + list { + s: "loc:@output_weights" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_200" + op: "IsVariableInitialized" + input: "output_bias" + attr { + key: "_class" + value { + list { + s: "loc:@output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_201" + op: "IsVariableInitialized" + input: "bert/embeddings/word_embeddings/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_202" + op: "IsVariableInitialized" + input: "bert/embeddings/word_embeddings/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_203" + op: "IsVariableInitialized" + input: "bert/embeddings/token_type_embeddings/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_204" + op: "IsVariableInitialized" + input: "bert/embeddings/token_type_embeddings/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_205" + op: "IsVariableInitialized" + input: "bert/embeddings/position_embeddings/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_206" + op: "IsVariableInitialized" + input: "bert/embeddings/position_embeddings/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_207" + op: "IsVariableInitialized" + input: "bert/embeddings/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_208" + op: "IsVariableInitialized" + input: "bert/embeddings/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_209" + op: "IsVariableInitialized" + input: "bert/embeddings/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_210" + op: "IsVariableInitialized" + input: "bert/embeddings/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_211" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_212" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_213" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_214" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_215" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_216" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_217" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_218" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_219" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_220" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_221" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_222" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_223" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_224" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_225" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_226" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_227" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_228" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_229" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_230" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_231" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_232" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_233" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_234" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_235" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_236" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_237" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_238" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_239" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_240" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_241" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_242" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_243" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_244" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_245" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_246" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_247" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_248" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_249" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_250" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_251" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_252" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_253" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_254" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_255" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_256" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_257" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_258" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_259" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_260" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_261" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_262" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_263" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_264" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_265" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_266" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_267" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_268" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_269" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_270" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_271" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_272" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_273" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_274" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_275" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_276" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_277" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_278" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_279" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_280" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_281" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_282" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_283" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_284" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_285" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_286" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_287" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_288" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_289" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_290" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_291" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_292" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_293" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_294" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_295" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_296" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_297" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_298" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_299" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_300" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_301" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_302" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_303" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_304" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_305" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_306" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_307" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_308" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_309" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_310" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_311" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_312" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_313" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_314" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_315" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_316" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_317" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_318" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_319" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_320" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_321" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_322" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_323" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_324" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_325" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_326" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_327" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_328" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_329" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_330" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_331" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_332" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_333" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_334" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_335" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_336" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_337" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_338" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_339" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_340" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_341" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_342" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_343" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_344" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_345" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_346" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_347" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_348" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_349" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_350" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_351" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_352" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_353" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_354" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_355" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_356" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_357" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_358" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_359" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_360" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_361" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_362" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_363" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_364" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_365" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_366" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_367" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_368" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_369" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_370" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_371" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_372" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_373" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_374" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_375" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_376" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_377" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_378" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_379" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_380" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_381" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_382" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_383" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_384" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_385" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_386" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_387" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_388" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_389" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_390" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_391" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_392" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_393" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_394" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_395" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_396" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_397" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_398" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_399" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_400" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_401" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_402" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_403" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_404" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_405" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_406" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_407" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_408" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_409" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_410" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_411" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_412" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_413" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_414" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_415" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_416" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_417" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_418" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_419" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_420" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_421" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_422" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_423" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_424" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_425" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_426" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_427" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_428" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_429" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_430" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_431" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_432" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_433" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_434" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_435" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_436" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_437" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_438" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_439" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_440" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_441" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_442" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_443" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_444" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_445" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_446" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_447" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_448" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_449" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_450" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_451" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_452" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_453" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_454" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_455" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_456" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_457" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_458" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_459" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_460" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_461" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_462" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_463" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_464" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_465" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_466" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_467" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_468" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_469" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_470" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_471" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_472" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_473" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_474" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_475" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_476" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_477" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_478" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_479" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_480" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_481" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_482" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_483" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_484" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_485" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_486" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_487" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_488" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_489" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_490" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_491" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_492" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_493" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_494" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_495" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_496" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_497" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_498" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_499" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_500" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_501" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_502" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_503" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_504" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_505" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_506" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_507" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_508" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_509" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_510" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_511" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_512" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_513" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_514" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_515" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_516" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_517" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_518" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_519" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_520" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_521" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_522" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_523" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_524" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_525" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_526" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_527" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_528" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_529" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_530" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_531" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_532" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_533" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_534" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_535" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_536" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_537" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_538" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_539" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_540" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_541" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_542" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_543" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_544" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_545" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_546" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_547" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_548" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_549" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_550" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_551" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_552" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_553" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_554" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_555" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_556" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_557" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_558" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_559" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_560" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_561" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_562" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_563" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_564" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_565" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_566" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_567" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_568" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_569" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_570" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_571" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_572" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_573" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_574" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_575" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_576" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_577" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_578" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_579" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_580" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_581" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_582" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_583" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_584" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_585" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_586" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_587" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_588" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_589" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_590" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_591" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_592" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_593" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_594" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_595" + op: "IsVariableInitialized" + input: "bert/pooler/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_596" + op: "IsVariableInitialized" + input: "bert/pooler/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_597" + op: "IsVariableInitialized" + input: "bert/pooler/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_598" + op: "IsVariableInitialized" + input: "bert/pooler/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_599" + op: "IsVariableInitialized" + input: "output_weights/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@output_weights/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_600" + op: "IsVariableInitialized" + input: "output_weights/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@output_weights/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_601" + op: "IsVariableInitialized" + input: "output_bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_602" + op: "IsVariableInitialized" + input: "output_bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/stack" + op: "Pack" + input: "report_uninitialized_variables/VarIsInitializedOp" + input: "report_uninitialized_variables/IsVariableInitialized" + input: "report_uninitialized_variables/IsVariableInitialized_1" + input: "report_uninitialized_variables/IsVariableInitialized_2" + input: "report_uninitialized_variables/IsVariableInitialized_3" + input: "report_uninitialized_variables/IsVariableInitialized_4" + input: "report_uninitialized_variables/IsVariableInitialized_5" + input: "report_uninitialized_variables/IsVariableInitialized_6" + input: "report_uninitialized_variables/IsVariableInitialized_7" + input: "report_uninitialized_variables/IsVariableInitialized_8" + input: "report_uninitialized_variables/IsVariableInitialized_9" + input: "report_uninitialized_variables/IsVariableInitialized_10" + input: "report_uninitialized_variables/IsVariableInitialized_11" + input: "report_uninitialized_variables/IsVariableInitialized_12" + input: "report_uninitialized_variables/IsVariableInitialized_13" + input: "report_uninitialized_variables/IsVariableInitialized_14" + input: "report_uninitialized_variables/IsVariableInitialized_15" + input: "report_uninitialized_variables/IsVariableInitialized_16" + input: "report_uninitialized_variables/IsVariableInitialized_17" + input: "report_uninitialized_variables/IsVariableInitialized_18" + input: "report_uninitialized_variables/IsVariableInitialized_19" + input: "report_uninitialized_variables/IsVariableInitialized_20" + input: "report_uninitialized_variables/IsVariableInitialized_21" + input: "report_uninitialized_variables/IsVariableInitialized_22" + input: "report_uninitialized_variables/IsVariableInitialized_23" + input: "report_uninitialized_variables/IsVariableInitialized_24" + input: "report_uninitialized_variables/IsVariableInitialized_25" + input: "report_uninitialized_variables/IsVariableInitialized_26" + input: "report_uninitialized_variables/IsVariableInitialized_27" + input: "report_uninitialized_variables/IsVariableInitialized_28" + input: "report_uninitialized_variables/IsVariableInitialized_29" + input: "report_uninitialized_variables/IsVariableInitialized_30" + input: "report_uninitialized_variables/IsVariableInitialized_31" + input: "report_uninitialized_variables/IsVariableInitialized_32" + input: "report_uninitialized_variables/IsVariableInitialized_33" + input: "report_uninitialized_variables/IsVariableInitialized_34" + input: "report_uninitialized_variables/IsVariableInitialized_35" + input: "report_uninitialized_variables/IsVariableInitialized_36" + input: "report_uninitialized_variables/IsVariableInitialized_37" + input: "report_uninitialized_variables/IsVariableInitialized_38" + input: "report_uninitialized_variables/IsVariableInitialized_39" + input: "report_uninitialized_variables/IsVariableInitialized_40" + input: "report_uninitialized_variables/IsVariableInitialized_41" + input: "report_uninitialized_variables/IsVariableInitialized_42" + input: "report_uninitialized_variables/IsVariableInitialized_43" + input: "report_uninitialized_variables/IsVariableInitialized_44" + input: "report_uninitialized_variables/IsVariableInitialized_45" + input: "report_uninitialized_variables/IsVariableInitialized_46" + input: "report_uninitialized_variables/IsVariableInitialized_47" + input: "report_uninitialized_variables/IsVariableInitialized_48" + input: "report_uninitialized_variables/IsVariableInitialized_49" + input: "report_uninitialized_variables/IsVariableInitialized_50" + input: "report_uninitialized_variables/IsVariableInitialized_51" + input: "report_uninitialized_variables/IsVariableInitialized_52" + input: "report_uninitialized_variables/IsVariableInitialized_53" + input: "report_uninitialized_variables/IsVariableInitialized_54" + input: "report_uninitialized_variables/IsVariableInitialized_55" + input: "report_uninitialized_variables/IsVariableInitialized_56" + input: "report_uninitialized_variables/IsVariableInitialized_57" + input: "report_uninitialized_variables/IsVariableInitialized_58" + input: "report_uninitialized_variables/IsVariableInitialized_59" + input: "report_uninitialized_variables/IsVariableInitialized_60" + input: "report_uninitialized_variables/IsVariableInitialized_61" + input: "report_uninitialized_variables/IsVariableInitialized_62" + input: "report_uninitialized_variables/IsVariableInitialized_63" + input: "report_uninitialized_variables/IsVariableInitialized_64" + input: "report_uninitialized_variables/IsVariableInitialized_65" + input: "report_uninitialized_variables/IsVariableInitialized_66" + input: "report_uninitialized_variables/IsVariableInitialized_67" + input: "report_uninitialized_variables/IsVariableInitialized_68" + input: "report_uninitialized_variables/IsVariableInitialized_69" + input: "report_uninitialized_variables/IsVariableInitialized_70" + input: "report_uninitialized_variables/IsVariableInitialized_71" + input: "report_uninitialized_variables/IsVariableInitialized_72" + input: "report_uninitialized_variables/IsVariableInitialized_73" + input: "report_uninitialized_variables/IsVariableInitialized_74" + input: "report_uninitialized_variables/IsVariableInitialized_75" + input: "report_uninitialized_variables/IsVariableInitialized_76" + input: "report_uninitialized_variables/IsVariableInitialized_77" + input: "report_uninitialized_variables/IsVariableInitialized_78" + input: "report_uninitialized_variables/IsVariableInitialized_79" + input: "report_uninitialized_variables/IsVariableInitialized_80" + input: "report_uninitialized_variables/IsVariableInitialized_81" + input: "report_uninitialized_variables/IsVariableInitialized_82" + input: "report_uninitialized_variables/IsVariableInitialized_83" + input: "report_uninitialized_variables/IsVariableInitialized_84" + input: "report_uninitialized_variables/IsVariableInitialized_85" + input: "report_uninitialized_variables/IsVariableInitialized_86" + input: "report_uninitialized_variables/IsVariableInitialized_87" + input: "report_uninitialized_variables/IsVariableInitialized_88" + input: "report_uninitialized_variables/IsVariableInitialized_89" + input: "report_uninitialized_variables/IsVariableInitialized_90" + input: "report_uninitialized_variables/IsVariableInitialized_91" + input: "report_uninitialized_variables/IsVariableInitialized_92" + input: "report_uninitialized_variables/IsVariableInitialized_93" + input: "report_uninitialized_variables/IsVariableInitialized_94" + input: "report_uninitialized_variables/IsVariableInitialized_95" + input: "report_uninitialized_variables/IsVariableInitialized_96" + input: "report_uninitialized_variables/IsVariableInitialized_97" + input: "report_uninitialized_variables/IsVariableInitialized_98" + input: "report_uninitialized_variables/IsVariableInitialized_99" + input: "report_uninitialized_variables/IsVariableInitialized_100" + input: "report_uninitialized_variables/IsVariableInitialized_101" + input: "report_uninitialized_variables/IsVariableInitialized_102" + input: "report_uninitialized_variables/IsVariableInitialized_103" + input: "report_uninitialized_variables/IsVariableInitialized_104" + input: "report_uninitialized_variables/IsVariableInitialized_105" + input: "report_uninitialized_variables/IsVariableInitialized_106" + input: "report_uninitialized_variables/IsVariableInitialized_107" + input: "report_uninitialized_variables/IsVariableInitialized_108" + input: "report_uninitialized_variables/IsVariableInitialized_109" + input: "report_uninitialized_variables/IsVariableInitialized_110" + input: "report_uninitialized_variables/IsVariableInitialized_111" + input: "report_uninitialized_variables/IsVariableInitialized_112" + input: "report_uninitialized_variables/IsVariableInitialized_113" + input: "report_uninitialized_variables/IsVariableInitialized_114" + input: "report_uninitialized_variables/IsVariableInitialized_115" + input: "report_uninitialized_variables/IsVariableInitialized_116" + input: "report_uninitialized_variables/IsVariableInitialized_117" + input: "report_uninitialized_variables/IsVariableInitialized_118" + input: "report_uninitialized_variables/IsVariableInitialized_119" + input: "report_uninitialized_variables/IsVariableInitialized_120" + input: "report_uninitialized_variables/IsVariableInitialized_121" + input: "report_uninitialized_variables/IsVariableInitialized_122" + input: "report_uninitialized_variables/IsVariableInitialized_123" + input: "report_uninitialized_variables/IsVariableInitialized_124" + input: "report_uninitialized_variables/IsVariableInitialized_125" + input: "report_uninitialized_variables/IsVariableInitialized_126" + input: "report_uninitialized_variables/IsVariableInitialized_127" + input: "report_uninitialized_variables/IsVariableInitialized_128" + input: "report_uninitialized_variables/IsVariableInitialized_129" + input: "report_uninitialized_variables/IsVariableInitialized_130" + input: "report_uninitialized_variables/IsVariableInitialized_131" + input: "report_uninitialized_variables/IsVariableInitialized_132" + input: "report_uninitialized_variables/IsVariableInitialized_133" + input: "report_uninitialized_variables/IsVariableInitialized_134" + input: "report_uninitialized_variables/IsVariableInitialized_135" + input: "report_uninitialized_variables/IsVariableInitialized_136" + input: "report_uninitialized_variables/IsVariableInitialized_137" + input: "report_uninitialized_variables/IsVariableInitialized_138" + input: "report_uninitialized_variables/IsVariableInitialized_139" + input: "report_uninitialized_variables/IsVariableInitialized_140" + input: "report_uninitialized_variables/IsVariableInitialized_141" + input: "report_uninitialized_variables/IsVariableInitialized_142" + input: "report_uninitialized_variables/IsVariableInitialized_143" + input: "report_uninitialized_variables/IsVariableInitialized_144" + input: "report_uninitialized_variables/IsVariableInitialized_145" + input: "report_uninitialized_variables/IsVariableInitialized_146" + input: "report_uninitialized_variables/IsVariableInitialized_147" + input: "report_uninitialized_variables/IsVariableInitialized_148" + input: "report_uninitialized_variables/IsVariableInitialized_149" + input: "report_uninitialized_variables/IsVariableInitialized_150" + input: "report_uninitialized_variables/IsVariableInitialized_151" + input: "report_uninitialized_variables/IsVariableInitialized_152" + input: "report_uninitialized_variables/IsVariableInitialized_153" + input: "report_uninitialized_variables/IsVariableInitialized_154" + input: "report_uninitialized_variables/IsVariableInitialized_155" + input: "report_uninitialized_variables/IsVariableInitialized_156" + input: "report_uninitialized_variables/IsVariableInitialized_157" + input: "report_uninitialized_variables/IsVariableInitialized_158" + input: "report_uninitialized_variables/IsVariableInitialized_159" + input: "report_uninitialized_variables/IsVariableInitialized_160" + input: "report_uninitialized_variables/IsVariableInitialized_161" + input: "report_uninitialized_variables/IsVariableInitialized_162" + input: "report_uninitialized_variables/IsVariableInitialized_163" + input: "report_uninitialized_variables/IsVariableInitialized_164" + input: "report_uninitialized_variables/IsVariableInitialized_165" + input: "report_uninitialized_variables/IsVariableInitialized_166" + input: "report_uninitialized_variables/IsVariableInitialized_167" + input: "report_uninitialized_variables/IsVariableInitialized_168" + input: "report_uninitialized_variables/IsVariableInitialized_169" + input: "report_uninitialized_variables/IsVariableInitialized_170" + input: "report_uninitialized_variables/IsVariableInitialized_171" + input: "report_uninitialized_variables/IsVariableInitialized_172" + input: "report_uninitialized_variables/IsVariableInitialized_173" + input: "report_uninitialized_variables/IsVariableInitialized_174" + input: "report_uninitialized_variables/IsVariableInitialized_175" + input: "report_uninitialized_variables/IsVariableInitialized_176" + input: "report_uninitialized_variables/IsVariableInitialized_177" + input: "report_uninitialized_variables/IsVariableInitialized_178" + input: "report_uninitialized_variables/IsVariableInitialized_179" + input: "report_uninitialized_variables/IsVariableInitialized_180" + input: "report_uninitialized_variables/IsVariableInitialized_181" + input: "report_uninitialized_variables/IsVariableInitialized_182" + input: "report_uninitialized_variables/IsVariableInitialized_183" + input: "report_uninitialized_variables/IsVariableInitialized_184" + input: "report_uninitialized_variables/IsVariableInitialized_185" + input: "report_uninitialized_variables/IsVariableInitialized_186" + input: "report_uninitialized_variables/IsVariableInitialized_187" + input: "report_uninitialized_variables/IsVariableInitialized_188" + input: "report_uninitialized_variables/IsVariableInitialized_189" + input: "report_uninitialized_variables/IsVariableInitialized_190" + input: "report_uninitialized_variables/IsVariableInitialized_191" + input: "report_uninitialized_variables/IsVariableInitialized_192" + input: "report_uninitialized_variables/IsVariableInitialized_193" + input: "report_uninitialized_variables/IsVariableInitialized_194" + input: "report_uninitialized_variables/IsVariableInitialized_195" + input: "report_uninitialized_variables/IsVariableInitialized_196" + input: "report_uninitialized_variables/IsVariableInitialized_197" + input: "report_uninitialized_variables/IsVariableInitialized_198" + input: "report_uninitialized_variables/IsVariableInitialized_199" + input: "report_uninitialized_variables/IsVariableInitialized_200" + input: "report_uninitialized_variables/IsVariableInitialized_201" + input: "report_uninitialized_variables/IsVariableInitialized_202" + input: "report_uninitialized_variables/IsVariableInitialized_203" + input: "report_uninitialized_variables/IsVariableInitialized_204" + input: "report_uninitialized_variables/IsVariableInitialized_205" + input: "report_uninitialized_variables/IsVariableInitialized_206" + input: "report_uninitialized_variables/IsVariableInitialized_207" + input: "report_uninitialized_variables/IsVariableInitialized_208" + input: "report_uninitialized_variables/IsVariableInitialized_209" + input: "report_uninitialized_variables/IsVariableInitialized_210" + input: "report_uninitialized_variables/IsVariableInitialized_211" + input: "report_uninitialized_variables/IsVariableInitialized_212" + input: "report_uninitialized_variables/IsVariableInitialized_213" + input: "report_uninitialized_variables/IsVariableInitialized_214" + input: "report_uninitialized_variables/IsVariableInitialized_215" + input: "report_uninitialized_variables/IsVariableInitialized_216" + input: "report_uninitialized_variables/IsVariableInitialized_217" + input: "report_uninitialized_variables/IsVariableInitialized_218" + input: "report_uninitialized_variables/IsVariableInitialized_219" + input: "report_uninitialized_variables/IsVariableInitialized_220" + input: "report_uninitialized_variables/IsVariableInitialized_221" + input: "report_uninitialized_variables/IsVariableInitialized_222" + input: "report_uninitialized_variables/IsVariableInitialized_223" + input: "report_uninitialized_variables/IsVariableInitialized_224" + input: "report_uninitialized_variables/IsVariableInitialized_225" + input: "report_uninitialized_variables/IsVariableInitialized_226" + input: "report_uninitialized_variables/IsVariableInitialized_227" + input: "report_uninitialized_variables/IsVariableInitialized_228" + input: "report_uninitialized_variables/IsVariableInitialized_229" + input: "report_uninitialized_variables/IsVariableInitialized_230" + input: "report_uninitialized_variables/IsVariableInitialized_231" + input: "report_uninitialized_variables/IsVariableInitialized_232" + input: "report_uninitialized_variables/IsVariableInitialized_233" + input: "report_uninitialized_variables/IsVariableInitialized_234" + input: "report_uninitialized_variables/IsVariableInitialized_235" + input: "report_uninitialized_variables/IsVariableInitialized_236" + input: "report_uninitialized_variables/IsVariableInitialized_237" + input: "report_uninitialized_variables/IsVariableInitialized_238" + input: "report_uninitialized_variables/IsVariableInitialized_239" + input: "report_uninitialized_variables/IsVariableInitialized_240" + input: "report_uninitialized_variables/IsVariableInitialized_241" + input: "report_uninitialized_variables/IsVariableInitialized_242" + input: "report_uninitialized_variables/IsVariableInitialized_243" + input: "report_uninitialized_variables/IsVariableInitialized_244" + input: "report_uninitialized_variables/IsVariableInitialized_245" + input: "report_uninitialized_variables/IsVariableInitialized_246" + input: "report_uninitialized_variables/IsVariableInitialized_247" + input: "report_uninitialized_variables/IsVariableInitialized_248" + input: "report_uninitialized_variables/IsVariableInitialized_249" + input: "report_uninitialized_variables/IsVariableInitialized_250" + input: "report_uninitialized_variables/IsVariableInitialized_251" + input: "report_uninitialized_variables/IsVariableInitialized_252" + input: "report_uninitialized_variables/IsVariableInitialized_253" + input: "report_uninitialized_variables/IsVariableInitialized_254" + input: "report_uninitialized_variables/IsVariableInitialized_255" + input: "report_uninitialized_variables/IsVariableInitialized_256" + input: "report_uninitialized_variables/IsVariableInitialized_257" + input: "report_uninitialized_variables/IsVariableInitialized_258" + input: "report_uninitialized_variables/IsVariableInitialized_259" + input: "report_uninitialized_variables/IsVariableInitialized_260" + input: "report_uninitialized_variables/IsVariableInitialized_261" + input: "report_uninitialized_variables/IsVariableInitialized_262" + input: "report_uninitialized_variables/IsVariableInitialized_263" + input: "report_uninitialized_variables/IsVariableInitialized_264" + input: "report_uninitialized_variables/IsVariableInitialized_265" + input: "report_uninitialized_variables/IsVariableInitialized_266" + input: "report_uninitialized_variables/IsVariableInitialized_267" + input: "report_uninitialized_variables/IsVariableInitialized_268" + input: "report_uninitialized_variables/IsVariableInitialized_269" + input: "report_uninitialized_variables/IsVariableInitialized_270" + input: "report_uninitialized_variables/IsVariableInitialized_271" + input: "report_uninitialized_variables/IsVariableInitialized_272" + input: "report_uninitialized_variables/IsVariableInitialized_273" + input: "report_uninitialized_variables/IsVariableInitialized_274" + input: "report_uninitialized_variables/IsVariableInitialized_275" + input: "report_uninitialized_variables/IsVariableInitialized_276" + input: "report_uninitialized_variables/IsVariableInitialized_277" + input: "report_uninitialized_variables/IsVariableInitialized_278" + input: "report_uninitialized_variables/IsVariableInitialized_279" + input: "report_uninitialized_variables/IsVariableInitialized_280" + input: "report_uninitialized_variables/IsVariableInitialized_281" + input: "report_uninitialized_variables/IsVariableInitialized_282" + input: "report_uninitialized_variables/IsVariableInitialized_283" + input: "report_uninitialized_variables/IsVariableInitialized_284" + input: "report_uninitialized_variables/IsVariableInitialized_285" + input: "report_uninitialized_variables/IsVariableInitialized_286" + input: "report_uninitialized_variables/IsVariableInitialized_287" + input: "report_uninitialized_variables/IsVariableInitialized_288" + input: "report_uninitialized_variables/IsVariableInitialized_289" + input: "report_uninitialized_variables/IsVariableInitialized_290" + input: "report_uninitialized_variables/IsVariableInitialized_291" + input: "report_uninitialized_variables/IsVariableInitialized_292" + input: "report_uninitialized_variables/IsVariableInitialized_293" + input: "report_uninitialized_variables/IsVariableInitialized_294" + input: "report_uninitialized_variables/IsVariableInitialized_295" + input: "report_uninitialized_variables/IsVariableInitialized_296" + input: "report_uninitialized_variables/IsVariableInitialized_297" + input: "report_uninitialized_variables/IsVariableInitialized_298" + input: "report_uninitialized_variables/IsVariableInitialized_299" + input: "report_uninitialized_variables/IsVariableInitialized_300" + input: "report_uninitialized_variables/IsVariableInitialized_301" + input: "report_uninitialized_variables/IsVariableInitialized_302" + input: "report_uninitialized_variables/IsVariableInitialized_303" + input: "report_uninitialized_variables/IsVariableInitialized_304" + input: "report_uninitialized_variables/IsVariableInitialized_305" + input: "report_uninitialized_variables/IsVariableInitialized_306" + input: "report_uninitialized_variables/IsVariableInitialized_307" + input: "report_uninitialized_variables/IsVariableInitialized_308" + input: "report_uninitialized_variables/IsVariableInitialized_309" + input: "report_uninitialized_variables/IsVariableInitialized_310" + input: "report_uninitialized_variables/IsVariableInitialized_311" + input: "report_uninitialized_variables/IsVariableInitialized_312" + input: "report_uninitialized_variables/IsVariableInitialized_313" + input: "report_uninitialized_variables/IsVariableInitialized_314" + input: "report_uninitialized_variables/IsVariableInitialized_315" + input: "report_uninitialized_variables/IsVariableInitialized_316" + input: "report_uninitialized_variables/IsVariableInitialized_317" + input: "report_uninitialized_variables/IsVariableInitialized_318" + input: "report_uninitialized_variables/IsVariableInitialized_319" + input: "report_uninitialized_variables/IsVariableInitialized_320" + input: "report_uninitialized_variables/IsVariableInitialized_321" + input: "report_uninitialized_variables/IsVariableInitialized_322" + input: "report_uninitialized_variables/IsVariableInitialized_323" + input: "report_uninitialized_variables/IsVariableInitialized_324" + input: "report_uninitialized_variables/IsVariableInitialized_325" + input: "report_uninitialized_variables/IsVariableInitialized_326" + input: "report_uninitialized_variables/IsVariableInitialized_327" + input: "report_uninitialized_variables/IsVariableInitialized_328" + input: "report_uninitialized_variables/IsVariableInitialized_329" + input: "report_uninitialized_variables/IsVariableInitialized_330" + input: "report_uninitialized_variables/IsVariableInitialized_331" + input: "report_uninitialized_variables/IsVariableInitialized_332" + input: "report_uninitialized_variables/IsVariableInitialized_333" + input: "report_uninitialized_variables/IsVariableInitialized_334" + input: "report_uninitialized_variables/IsVariableInitialized_335" + input: "report_uninitialized_variables/IsVariableInitialized_336" + input: "report_uninitialized_variables/IsVariableInitialized_337" + input: "report_uninitialized_variables/IsVariableInitialized_338" + input: "report_uninitialized_variables/IsVariableInitialized_339" + input: "report_uninitialized_variables/IsVariableInitialized_340" + input: "report_uninitialized_variables/IsVariableInitialized_341" + input: "report_uninitialized_variables/IsVariableInitialized_342" + input: "report_uninitialized_variables/IsVariableInitialized_343" + input: "report_uninitialized_variables/IsVariableInitialized_344" + input: "report_uninitialized_variables/IsVariableInitialized_345" + input: "report_uninitialized_variables/IsVariableInitialized_346" + input: "report_uninitialized_variables/IsVariableInitialized_347" + input: "report_uninitialized_variables/IsVariableInitialized_348" + input: "report_uninitialized_variables/IsVariableInitialized_349" + input: "report_uninitialized_variables/IsVariableInitialized_350" + input: "report_uninitialized_variables/IsVariableInitialized_351" + input: "report_uninitialized_variables/IsVariableInitialized_352" + input: "report_uninitialized_variables/IsVariableInitialized_353" + input: "report_uninitialized_variables/IsVariableInitialized_354" + input: "report_uninitialized_variables/IsVariableInitialized_355" + input: "report_uninitialized_variables/IsVariableInitialized_356" + input: "report_uninitialized_variables/IsVariableInitialized_357" + input: "report_uninitialized_variables/IsVariableInitialized_358" + input: "report_uninitialized_variables/IsVariableInitialized_359" + input: "report_uninitialized_variables/IsVariableInitialized_360" + input: "report_uninitialized_variables/IsVariableInitialized_361" + input: "report_uninitialized_variables/IsVariableInitialized_362" + input: "report_uninitialized_variables/IsVariableInitialized_363" + input: "report_uninitialized_variables/IsVariableInitialized_364" + input: "report_uninitialized_variables/IsVariableInitialized_365" + input: "report_uninitialized_variables/IsVariableInitialized_366" + input: "report_uninitialized_variables/IsVariableInitialized_367" + input: "report_uninitialized_variables/IsVariableInitialized_368" + input: "report_uninitialized_variables/IsVariableInitialized_369" + input: "report_uninitialized_variables/IsVariableInitialized_370" + input: "report_uninitialized_variables/IsVariableInitialized_371" + input: "report_uninitialized_variables/IsVariableInitialized_372" + input: "report_uninitialized_variables/IsVariableInitialized_373" + input: "report_uninitialized_variables/IsVariableInitialized_374" + input: "report_uninitialized_variables/IsVariableInitialized_375" + input: "report_uninitialized_variables/IsVariableInitialized_376" + input: "report_uninitialized_variables/IsVariableInitialized_377" + input: "report_uninitialized_variables/IsVariableInitialized_378" + input: "report_uninitialized_variables/IsVariableInitialized_379" + input: "report_uninitialized_variables/IsVariableInitialized_380" + input: "report_uninitialized_variables/IsVariableInitialized_381" + input: "report_uninitialized_variables/IsVariableInitialized_382" + input: "report_uninitialized_variables/IsVariableInitialized_383" + input: "report_uninitialized_variables/IsVariableInitialized_384" + input: "report_uninitialized_variables/IsVariableInitialized_385" + input: "report_uninitialized_variables/IsVariableInitialized_386" + input: "report_uninitialized_variables/IsVariableInitialized_387" + input: "report_uninitialized_variables/IsVariableInitialized_388" + input: "report_uninitialized_variables/IsVariableInitialized_389" + input: "report_uninitialized_variables/IsVariableInitialized_390" + input: "report_uninitialized_variables/IsVariableInitialized_391" + input: "report_uninitialized_variables/IsVariableInitialized_392" + input: "report_uninitialized_variables/IsVariableInitialized_393" + input: "report_uninitialized_variables/IsVariableInitialized_394" + input: "report_uninitialized_variables/IsVariableInitialized_395" + input: "report_uninitialized_variables/IsVariableInitialized_396" + input: "report_uninitialized_variables/IsVariableInitialized_397" + input: "report_uninitialized_variables/IsVariableInitialized_398" + input: "report_uninitialized_variables/IsVariableInitialized_399" + input: "report_uninitialized_variables/IsVariableInitialized_400" + input: "report_uninitialized_variables/IsVariableInitialized_401" + input: "report_uninitialized_variables/IsVariableInitialized_402" + input: "report_uninitialized_variables/IsVariableInitialized_403" + input: "report_uninitialized_variables/IsVariableInitialized_404" + input: "report_uninitialized_variables/IsVariableInitialized_405" + input: "report_uninitialized_variables/IsVariableInitialized_406" + input: "report_uninitialized_variables/IsVariableInitialized_407" + input: "report_uninitialized_variables/IsVariableInitialized_408" + input: "report_uninitialized_variables/IsVariableInitialized_409" + input: "report_uninitialized_variables/IsVariableInitialized_410" + input: "report_uninitialized_variables/IsVariableInitialized_411" + input: "report_uninitialized_variables/IsVariableInitialized_412" + input: "report_uninitialized_variables/IsVariableInitialized_413" + input: "report_uninitialized_variables/IsVariableInitialized_414" + input: "report_uninitialized_variables/IsVariableInitialized_415" + input: "report_uninitialized_variables/IsVariableInitialized_416" + input: "report_uninitialized_variables/IsVariableInitialized_417" + input: "report_uninitialized_variables/IsVariableInitialized_418" + input: "report_uninitialized_variables/IsVariableInitialized_419" + input: "report_uninitialized_variables/IsVariableInitialized_420" + input: "report_uninitialized_variables/IsVariableInitialized_421" + input: "report_uninitialized_variables/IsVariableInitialized_422" + input: "report_uninitialized_variables/IsVariableInitialized_423" + input: "report_uninitialized_variables/IsVariableInitialized_424" + input: "report_uninitialized_variables/IsVariableInitialized_425" + input: "report_uninitialized_variables/IsVariableInitialized_426" + input: "report_uninitialized_variables/IsVariableInitialized_427" + input: "report_uninitialized_variables/IsVariableInitialized_428" + input: "report_uninitialized_variables/IsVariableInitialized_429" + input: "report_uninitialized_variables/IsVariableInitialized_430" + input: "report_uninitialized_variables/IsVariableInitialized_431" + input: "report_uninitialized_variables/IsVariableInitialized_432" + input: "report_uninitialized_variables/IsVariableInitialized_433" + input: "report_uninitialized_variables/IsVariableInitialized_434" + input: "report_uninitialized_variables/IsVariableInitialized_435" + input: "report_uninitialized_variables/IsVariableInitialized_436" + input: "report_uninitialized_variables/IsVariableInitialized_437" + input: "report_uninitialized_variables/IsVariableInitialized_438" + input: "report_uninitialized_variables/IsVariableInitialized_439" + input: "report_uninitialized_variables/IsVariableInitialized_440" + input: "report_uninitialized_variables/IsVariableInitialized_441" + input: "report_uninitialized_variables/IsVariableInitialized_442" + input: "report_uninitialized_variables/IsVariableInitialized_443" + input: "report_uninitialized_variables/IsVariableInitialized_444" + input: "report_uninitialized_variables/IsVariableInitialized_445" + input: "report_uninitialized_variables/IsVariableInitialized_446" + input: "report_uninitialized_variables/IsVariableInitialized_447" + input: "report_uninitialized_variables/IsVariableInitialized_448" + input: "report_uninitialized_variables/IsVariableInitialized_449" + input: "report_uninitialized_variables/IsVariableInitialized_450" + input: "report_uninitialized_variables/IsVariableInitialized_451" + input: "report_uninitialized_variables/IsVariableInitialized_452" + input: "report_uninitialized_variables/IsVariableInitialized_453" + input: "report_uninitialized_variables/IsVariableInitialized_454" + input: "report_uninitialized_variables/IsVariableInitialized_455" + input: "report_uninitialized_variables/IsVariableInitialized_456" + input: "report_uninitialized_variables/IsVariableInitialized_457" + input: "report_uninitialized_variables/IsVariableInitialized_458" + input: "report_uninitialized_variables/IsVariableInitialized_459" + input: "report_uninitialized_variables/IsVariableInitialized_460" + input: "report_uninitialized_variables/IsVariableInitialized_461" + input: "report_uninitialized_variables/IsVariableInitialized_462" + input: "report_uninitialized_variables/IsVariableInitialized_463" + input: "report_uninitialized_variables/IsVariableInitialized_464" + input: "report_uninitialized_variables/IsVariableInitialized_465" + input: "report_uninitialized_variables/IsVariableInitialized_466" + input: "report_uninitialized_variables/IsVariableInitialized_467" + input: "report_uninitialized_variables/IsVariableInitialized_468" + input: "report_uninitialized_variables/IsVariableInitialized_469" + input: "report_uninitialized_variables/IsVariableInitialized_470" + input: "report_uninitialized_variables/IsVariableInitialized_471" + input: "report_uninitialized_variables/IsVariableInitialized_472" + input: "report_uninitialized_variables/IsVariableInitialized_473" + input: "report_uninitialized_variables/IsVariableInitialized_474" + input: "report_uninitialized_variables/IsVariableInitialized_475" + input: "report_uninitialized_variables/IsVariableInitialized_476" + input: "report_uninitialized_variables/IsVariableInitialized_477" + input: "report_uninitialized_variables/IsVariableInitialized_478" + input: "report_uninitialized_variables/IsVariableInitialized_479" + input: "report_uninitialized_variables/IsVariableInitialized_480" + input: "report_uninitialized_variables/IsVariableInitialized_481" + input: "report_uninitialized_variables/IsVariableInitialized_482" + input: "report_uninitialized_variables/IsVariableInitialized_483" + input: "report_uninitialized_variables/IsVariableInitialized_484" + input: "report_uninitialized_variables/IsVariableInitialized_485" + input: "report_uninitialized_variables/IsVariableInitialized_486" + input: "report_uninitialized_variables/IsVariableInitialized_487" + input: "report_uninitialized_variables/IsVariableInitialized_488" + input: "report_uninitialized_variables/IsVariableInitialized_489" + input: "report_uninitialized_variables/IsVariableInitialized_490" + input: "report_uninitialized_variables/IsVariableInitialized_491" + input: "report_uninitialized_variables/IsVariableInitialized_492" + input: "report_uninitialized_variables/IsVariableInitialized_493" + input: "report_uninitialized_variables/IsVariableInitialized_494" + input: "report_uninitialized_variables/IsVariableInitialized_495" + input: "report_uninitialized_variables/IsVariableInitialized_496" + input: "report_uninitialized_variables/IsVariableInitialized_497" + input: "report_uninitialized_variables/IsVariableInitialized_498" + input: "report_uninitialized_variables/IsVariableInitialized_499" + input: "report_uninitialized_variables/IsVariableInitialized_500" + input: "report_uninitialized_variables/IsVariableInitialized_501" + input: "report_uninitialized_variables/IsVariableInitialized_502" + input: "report_uninitialized_variables/IsVariableInitialized_503" + input: "report_uninitialized_variables/IsVariableInitialized_504" + input: "report_uninitialized_variables/IsVariableInitialized_505" + input: "report_uninitialized_variables/IsVariableInitialized_506" + input: "report_uninitialized_variables/IsVariableInitialized_507" + input: "report_uninitialized_variables/IsVariableInitialized_508" + input: "report_uninitialized_variables/IsVariableInitialized_509" + input: "report_uninitialized_variables/IsVariableInitialized_510" + input: "report_uninitialized_variables/IsVariableInitialized_511" + input: "report_uninitialized_variables/IsVariableInitialized_512" + input: "report_uninitialized_variables/IsVariableInitialized_513" + input: "report_uninitialized_variables/IsVariableInitialized_514" + input: "report_uninitialized_variables/IsVariableInitialized_515" + input: "report_uninitialized_variables/IsVariableInitialized_516" + input: "report_uninitialized_variables/IsVariableInitialized_517" + input: "report_uninitialized_variables/IsVariableInitialized_518" + input: "report_uninitialized_variables/IsVariableInitialized_519" + input: "report_uninitialized_variables/IsVariableInitialized_520" + input: "report_uninitialized_variables/IsVariableInitialized_521" + input: "report_uninitialized_variables/IsVariableInitialized_522" + input: "report_uninitialized_variables/IsVariableInitialized_523" + input: "report_uninitialized_variables/IsVariableInitialized_524" + input: "report_uninitialized_variables/IsVariableInitialized_525" + input: "report_uninitialized_variables/IsVariableInitialized_526" + input: "report_uninitialized_variables/IsVariableInitialized_527" + input: "report_uninitialized_variables/IsVariableInitialized_528" + input: "report_uninitialized_variables/IsVariableInitialized_529" + input: "report_uninitialized_variables/IsVariableInitialized_530" + input: "report_uninitialized_variables/IsVariableInitialized_531" + input: "report_uninitialized_variables/IsVariableInitialized_532" + input: "report_uninitialized_variables/IsVariableInitialized_533" + input: "report_uninitialized_variables/IsVariableInitialized_534" + input: "report_uninitialized_variables/IsVariableInitialized_535" + input: "report_uninitialized_variables/IsVariableInitialized_536" + input: "report_uninitialized_variables/IsVariableInitialized_537" + input: "report_uninitialized_variables/IsVariableInitialized_538" + input: "report_uninitialized_variables/IsVariableInitialized_539" + input: "report_uninitialized_variables/IsVariableInitialized_540" + input: "report_uninitialized_variables/IsVariableInitialized_541" + input: "report_uninitialized_variables/IsVariableInitialized_542" + input: "report_uninitialized_variables/IsVariableInitialized_543" + input: "report_uninitialized_variables/IsVariableInitialized_544" + input: "report_uninitialized_variables/IsVariableInitialized_545" + input: "report_uninitialized_variables/IsVariableInitialized_546" + input: "report_uninitialized_variables/IsVariableInitialized_547" + input: "report_uninitialized_variables/IsVariableInitialized_548" + input: "report_uninitialized_variables/IsVariableInitialized_549" + input: "report_uninitialized_variables/IsVariableInitialized_550" + input: "report_uninitialized_variables/IsVariableInitialized_551" + input: "report_uninitialized_variables/IsVariableInitialized_552" + input: "report_uninitialized_variables/IsVariableInitialized_553" + input: "report_uninitialized_variables/IsVariableInitialized_554" + input: "report_uninitialized_variables/IsVariableInitialized_555" + input: "report_uninitialized_variables/IsVariableInitialized_556" + input: "report_uninitialized_variables/IsVariableInitialized_557" + input: "report_uninitialized_variables/IsVariableInitialized_558" + input: "report_uninitialized_variables/IsVariableInitialized_559" + input: "report_uninitialized_variables/IsVariableInitialized_560" + input: "report_uninitialized_variables/IsVariableInitialized_561" + input: "report_uninitialized_variables/IsVariableInitialized_562" + input: "report_uninitialized_variables/IsVariableInitialized_563" + input: "report_uninitialized_variables/IsVariableInitialized_564" + input: "report_uninitialized_variables/IsVariableInitialized_565" + input: "report_uninitialized_variables/IsVariableInitialized_566" + input: "report_uninitialized_variables/IsVariableInitialized_567" + input: "report_uninitialized_variables/IsVariableInitialized_568" + input: "report_uninitialized_variables/IsVariableInitialized_569" + input: "report_uninitialized_variables/IsVariableInitialized_570" + input: "report_uninitialized_variables/IsVariableInitialized_571" + input: "report_uninitialized_variables/IsVariableInitialized_572" + input: "report_uninitialized_variables/IsVariableInitialized_573" + input: "report_uninitialized_variables/IsVariableInitialized_574" + input: "report_uninitialized_variables/IsVariableInitialized_575" + input: "report_uninitialized_variables/IsVariableInitialized_576" + input: "report_uninitialized_variables/IsVariableInitialized_577" + input: "report_uninitialized_variables/IsVariableInitialized_578" + input: "report_uninitialized_variables/IsVariableInitialized_579" + input: "report_uninitialized_variables/IsVariableInitialized_580" + input: "report_uninitialized_variables/IsVariableInitialized_581" + input: "report_uninitialized_variables/IsVariableInitialized_582" + input: "report_uninitialized_variables/IsVariableInitialized_583" + input: "report_uninitialized_variables/IsVariableInitialized_584" + input: "report_uninitialized_variables/IsVariableInitialized_585" + input: "report_uninitialized_variables/IsVariableInitialized_586" + input: "report_uninitialized_variables/IsVariableInitialized_587" + input: "report_uninitialized_variables/IsVariableInitialized_588" + input: "report_uninitialized_variables/IsVariableInitialized_589" + input: "report_uninitialized_variables/IsVariableInitialized_590" + input: "report_uninitialized_variables/IsVariableInitialized_591" + input: "report_uninitialized_variables/IsVariableInitialized_592" + input: "report_uninitialized_variables/IsVariableInitialized_593" + input: "report_uninitialized_variables/IsVariableInitialized_594" + input: "report_uninitialized_variables/IsVariableInitialized_595" + input: "report_uninitialized_variables/IsVariableInitialized_596" + input: "report_uninitialized_variables/IsVariableInitialized_597" + input: "report_uninitialized_variables/IsVariableInitialized_598" + input: "report_uninitialized_variables/IsVariableInitialized_599" + input: "report_uninitialized_variables/IsVariableInitialized_600" + input: "report_uninitialized_variables/IsVariableInitialized_601" + input: "report_uninitialized_variables/IsVariableInitialized_602" + device: "/device:CPU:0" + attr { + key: "N" + value { + i: 604 + } + } + attr { + key: "T" + value { + type: DT_BOOL + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 604 + } + } + } + } + } + attr { + key: "axis" + value { + i: 0 + } + } +} +node { + name: "report_uninitialized_variables/LogicalNot" + op: "LogicalNot" + input: "report_uninitialized_variables/stack" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 604 + } + } + } + } + } +} +node { + name: "report_uninitialized_variables/Const" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 604 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 604 + } + } + string_val: "global_step" + string_val: "bert/embeddings/word_embeddings" + string_val: "bert/embeddings/token_type_embeddings" + string_val: "bert/embeddings/position_embeddings" + string_val: "bert/embeddings/LayerNorm/beta" + string_val: "bert/embeddings/LayerNorm/gamma" + string_val: "bert/encoder/layer_0/attention/self/query/kernel" + string_val: "bert/encoder/layer_0/attention/self/query/bias" + string_val: "bert/encoder/layer_0/attention/self/key/kernel" + string_val: "bert/encoder/layer_0/attention/self/key/bias" + string_val: "bert/encoder/layer_0/attention/self/value/kernel" + string_val: "bert/encoder/layer_0/attention/self/value/bias" + string_val: "bert/encoder/layer_0/attention/output/dense/kernel" + string_val: "bert/encoder/layer_0/attention/output/dense/bias" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_0/intermediate/dense/kernel" + string_val: "bert/encoder/layer_0/intermediate/dense/bias" + string_val: "bert/encoder/layer_0/output/dense/kernel" + string_val: "bert/encoder/layer_0/output/dense/bias" + string_val: "bert/encoder/layer_0/output/LayerNorm/beta" + string_val: "bert/encoder/layer_0/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_1/attention/self/query/kernel" + string_val: "bert/encoder/layer_1/attention/self/query/bias" + string_val: "bert/encoder/layer_1/attention/self/key/kernel" + string_val: "bert/encoder/layer_1/attention/self/key/bias" + string_val: "bert/encoder/layer_1/attention/self/value/kernel" + string_val: "bert/encoder/layer_1/attention/self/value/bias" + string_val: "bert/encoder/layer_1/attention/output/dense/kernel" + string_val: "bert/encoder/layer_1/attention/output/dense/bias" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_1/intermediate/dense/kernel" + string_val: "bert/encoder/layer_1/intermediate/dense/bias" + string_val: "bert/encoder/layer_1/output/dense/kernel" + string_val: "bert/encoder/layer_1/output/dense/bias" + string_val: "bert/encoder/layer_1/output/LayerNorm/beta" + string_val: "bert/encoder/layer_1/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_2/attention/self/query/kernel" + string_val: "bert/encoder/layer_2/attention/self/query/bias" + string_val: "bert/encoder/layer_2/attention/self/key/kernel" + string_val: "bert/encoder/layer_2/attention/self/key/bias" + string_val: "bert/encoder/layer_2/attention/self/value/kernel" + string_val: "bert/encoder/layer_2/attention/self/value/bias" + string_val: "bert/encoder/layer_2/attention/output/dense/kernel" + string_val: "bert/encoder/layer_2/attention/output/dense/bias" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_2/intermediate/dense/kernel" + string_val: "bert/encoder/layer_2/intermediate/dense/bias" + string_val: "bert/encoder/layer_2/output/dense/kernel" + string_val: "bert/encoder/layer_2/output/dense/bias" + string_val: "bert/encoder/layer_2/output/LayerNorm/beta" + string_val: "bert/encoder/layer_2/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_3/attention/self/query/kernel" + string_val: "bert/encoder/layer_3/attention/self/query/bias" + string_val: "bert/encoder/layer_3/attention/self/key/kernel" + string_val: "bert/encoder/layer_3/attention/self/key/bias" + string_val: "bert/encoder/layer_3/attention/self/value/kernel" + string_val: "bert/encoder/layer_3/attention/self/value/bias" + string_val: "bert/encoder/layer_3/attention/output/dense/kernel" + string_val: "bert/encoder/layer_3/attention/output/dense/bias" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_3/intermediate/dense/kernel" + string_val: "bert/encoder/layer_3/intermediate/dense/bias" + string_val: "bert/encoder/layer_3/output/dense/kernel" + string_val: "bert/encoder/layer_3/output/dense/bias" + string_val: "bert/encoder/layer_3/output/LayerNorm/beta" + string_val: "bert/encoder/layer_3/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_4/attention/self/query/kernel" + string_val: "bert/encoder/layer_4/attention/self/query/bias" + string_val: "bert/encoder/layer_4/attention/self/key/kernel" + string_val: "bert/encoder/layer_4/attention/self/key/bias" + string_val: "bert/encoder/layer_4/attention/self/value/kernel" + string_val: "bert/encoder/layer_4/attention/self/value/bias" + string_val: "bert/encoder/layer_4/attention/output/dense/kernel" + string_val: "bert/encoder/layer_4/attention/output/dense/bias" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_4/intermediate/dense/kernel" + string_val: "bert/encoder/layer_4/intermediate/dense/bias" + string_val: "bert/encoder/layer_4/output/dense/kernel" + string_val: "bert/encoder/layer_4/output/dense/bias" + string_val: "bert/encoder/layer_4/output/LayerNorm/beta" + string_val: "bert/encoder/layer_4/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_5/attention/self/query/kernel" + string_val: "bert/encoder/layer_5/attention/self/query/bias" + string_val: "bert/encoder/layer_5/attention/self/key/kernel" + string_val: "bert/encoder/layer_5/attention/self/key/bias" + string_val: "bert/encoder/layer_5/attention/self/value/kernel" + string_val: "bert/encoder/layer_5/attention/self/value/bias" + string_val: "bert/encoder/layer_5/attention/output/dense/kernel" + string_val: "bert/encoder/layer_5/attention/output/dense/bias" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_5/intermediate/dense/kernel" + string_val: "bert/encoder/layer_5/intermediate/dense/bias" + string_val: "bert/encoder/layer_5/output/dense/kernel" + string_val: "bert/encoder/layer_5/output/dense/bias" + string_val: "bert/encoder/layer_5/output/LayerNorm/beta" + string_val: "bert/encoder/layer_5/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_6/attention/self/query/kernel" + string_val: "bert/encoder/layer_6/attention/self/query/bias" + string_val: "bert/encoder/layer_6/attention/self/key/kernel" + string_val: "bert/encoder/layer_6/attention/self/key/bias" + string_val: "bert/encoder/layer_6/attention/self/value/kernel" + string_val: "bert/encoder/layer_6/attention/self/value/bias" + string_val: "bert/encoder/layer_6/attention/output/dense/kernel" + string_val: "bert/encoder/layer_6/attention/output/dense/bias" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_6/intermediate/dense/kernel" + string_val: "bert/encoder/layer_6/intermediate/dense/bias" + string_val: "bert/encoder/layer_6/output/dense/kernel" + string_val: "bert/encoder/layer_6/output/dense/bias" + string_val: "bert/encoder/layer_6/output/LayerNorm/beta" + string_val: "bert/encoder/layer_6/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_7/attention/self/query/kernel" + string_val: "bert/encoder/layer_7/attention/self/query/bias" + string_val: "bert/encoder/layer_7/attention/self/key/kernel" + string_val: "bert/encoder/layer_7/attention/self/key/bias" + string_val: "bert/encoder/layer_7/attention/self/value/kernel" + string_val: "bert/encoder/layer_7/attention/self/value/bias" + string_val: "bert/encoder/layer_7/attention/output/dense/kernel" + string_val: "bert/encoder/layer_7/attention/output/dense/bias" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_7/intermediate/dense/kernel" + string_val: "bert/encoder/layer_7/intermediate/dense/bias" + string_val: "bert/encoder/layer_7/output/dense/kernel" + string_val: "bert/encoder/layer_7/output/dense/bias" + string_val: "bert/encoder/layer_7/output/LayerNorm/beta" + string_val: "bert/encoder/layer_7/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_8/attention/self/query/kernel" + string_val: "bert/encoder/layer_8/attention/self/query/bias" + string_val: "bert/encoder/layer_8/attention/self/key/kernel" + string_val: "bert/encoder/layer_8/attention/self/key/bias" + string_val: "bert/encoder/layer_8/attention/self/value/kernel" + string_val: "bert/encoder/layer_8/attention/self/value/bias" + string_val: "bert/encoder/layer_8/attention/output/dense/kernel" + string_val: "bert/encoder/layer_8/attention/output/dense/bias" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_8/intermediate/dense/kernel" + string_val: "bert/encoder/layer_8/intermediate/dense/bias" + string_val: "bert/encoder/layer_8/output/dense/kernel" + string_val: "bert/encoder/layer_8/output/dense/bias" + string_val: "bert/encoder/layer_8/output/LayerNorm/beta" + string_val: "bert/encoder/layer_8/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_9/attention/self/query/kernel" + string_val: "bert/encoder/layer_9/attention/self/query/bias" + string_val: "bert/encoder/layer_9/attention/self/key/kernel" + string_val: "bert/encoder/layer_9/attention/self/key/bias" + string_val: "bert/encoder/layer_9/attention/self/value/kernel" + string_val: "bert/encoder/layer_9/attention/self/value/bias" + string_val: "bert/encoder/layer_9/attention/output/dense/kernel" + string_val: "bert/encoder/layer_9/attention/output/dense/bias" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_9/intermediate/dense/kernel" + string_val: "bert/encoder/layer_9/intermediate/dense/bias" + string_val: "bert/encoder/layer_9/output/dense/kernel" + string_val: "bert/encoder/layer_9/output/dense/bias" + string_val: "bert/encoder/layer_9/output/LayerNorm/beta" + string_val: "bert/encoder/layer_9/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_10/attention/self/query/kernel" + string_val: "bert/encoder/layer_10/attention/self/query/bias" + string_val: "bert/encoder/layer_10/attention/self/key/kernel" + string_val: "bert/encoder/layer_10/attention/self/key/bias" + string_val: "bert/encoder/layer_10/attention/self/value/kernel" + string_val: "bert/encoder/layer_10/attention/self/value/bias" + string_val: "bert/encoder/layer_10/attention/output/dense/kernel" + string_val: "bert/encoder/layer_10/attention/output/dense/bias" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_10/intermediate/dense/kernel" + string_val: "bert/encoder/layer_10/intermediate/dense/bias" + string_val: "bert/encoder/layer_10/output/dense/kernel" + string_val: "bert/encoder/layer_10/output/dense/bias" + string_val: "bert/encoder/layer_10/output/LayerNorm/beta" + string_val: "bert/encoder/layer_10/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_11/attention/self/query/kernel" + string_val: "bert/encoder/layer_11/attention/self/query/bias" + string_val: "bert/encoder/layer_11/attention/self/key/kernel" + string_val: "bert/encoder/layer_11/attention/self/key/bias" + string_val: "bert/encoder/layer_11/attention/self/value/kernel" + string_val: "bert/encoder/layer_11/attention/self/value/bias" + string_val: "bert/encoder/layer_11/attention/output/dense/kernel" + string_val: "bert/encoder/layer_11/attention/output/dense/bias" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_11/intermediate/dense/kernel" + string_val: "bert/encoder/layer_11/intermediate/dense/bias" + string_val: "bert/encoder/layer_11/output/dense/kernel" + string_val: "bert/encoder/layer_11/output/dense/bias" + string_val: "bert/encoder/layer_11/output/LayerNorm/beta" + string_val: "bert/encoder/layer_11/output/LayerNorm/gamma" + string_val: "bert/pooler/dense/kernel" + string_val: "bert/pooler/dense/bias" + string_val: "output_weights" + string_val: "output_bias" + string_val: "bert/embeddings/word_embeddings/adam_m" + string_val: "bert/embeddings/word_embeddings/adam_v" + string_val: "bert/embeddings/token_type_embeddings/adam_m" + string_val: "bert/embeddings/token_type_embeddings/adam_v" + string_val: "bert/embeddings/position_embeddings/adam_m" + string_val: "bert/embeddings/position_embeddings/adam_v" + string_val: "bert/embeddings/LayerNorm/beta/adam_m" + string_val: "bert/embeddings/LayerNorm/beta/adam_v" + string_val: "bert/embeddings/LayerNorm/gamma/adam_m" + string_val: "bert/embeddings/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_0/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_0/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_0/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_0/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_0/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_0/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_0/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_0/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_0/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_0/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_0/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_0/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_0/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_0/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_0/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_0/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_0/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_0/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_0/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_0/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_1/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_1/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_1/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_1/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_1/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_1/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_1/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_1/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_1/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_1/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_1/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_1/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_1/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_1/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_1/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_1/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_1/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_1/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_1/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_1/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_2/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_2/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_2/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_2/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_2/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_2/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_2/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_2/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_2/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_2/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_2/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_2/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_2/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_2/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_2/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_2/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_2/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_2/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_2/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_2/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_3/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_3/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_3/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_3/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_3/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_3/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_3/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_3/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_3/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_3/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_3/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_3/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_3/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_3/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_3/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_3/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_3/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_3/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_3/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_3/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_4/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_4/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_4/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_4/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_4/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_4/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_4/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_4/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_4/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_4/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_4/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_4/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_4/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_4/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_4/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_4/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_4/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_4/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_4/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_4/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_5/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_5/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_5/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_5/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_5/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_5/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_5/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_5/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_5/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_5/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_5/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_5/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_5/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_5/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_5/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_5/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_5/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_5/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_5/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_5/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_6/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_6/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_6/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_6/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_6/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_6/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_6/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_6/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_6/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_6/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_6/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_6/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_6/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_6/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_6/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_6/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_6/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_6/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_6/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_6/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_7/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_7/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_7/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_7/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_7/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_7/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_7/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_7/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_7/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_7/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_7/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_7/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_7/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_7/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_7/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_7/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_7/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_7/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_7/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_7/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_8/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_8/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_8/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_8/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_8/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_8/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_8/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_8/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_8/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_8/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_8/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_8/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_8/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_8/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_8/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_8/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_8/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_8/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_8/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_8/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_9/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_9/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_9/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_9/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_9/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_9/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_9/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_9/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_9/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_9/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_9/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_9/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_9/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_9/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_9/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_9/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_9/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_9/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_9/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_9/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_10/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_10/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_10/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_10/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_10/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_10/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_10/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_10/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_10/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_10/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_10/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_10/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_10/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_10/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_10/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_10/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_10/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_10/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_10/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_10/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_11/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_11/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_11/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_11/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_11/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_11/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_11/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_11/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_11/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_11/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_11/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_11/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_11/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_11/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_11/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_11/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_11/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_11/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_11/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_11/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v" + string_val: "bert/pooler/dense/kernel/adam_m" + string_val: "bert/pooler/dense/kernel/adam_v" + string_val: "bert/pooler/dense/bias/adam_m" + string_val: "bert/pooler/dense/bias/adam_v" + string_val: "output_weights/adam_m" + string_val: "output_weights/adam_v" + string_val: "output_bias/adam_m" + string_val: "output_bias/adam_v" + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/Shape" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 604 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/strided_slice/stack" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/strided_slice/stack_1" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/strided_slice/stack_2" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/strided_slice" + op: "StridedSlice" + input: "report_uninitialized_variables/boolean_mask/Shape" + input: "report_uninitialized_variables/boolean_mask/strided_slice/stack" + input: "report_uninitialized_variables/boolean_mask/strided_slice/stack_1" + input: "report_uninitialized_variables/boolean_mask/strided_slice/stack_2" + device: "/device:CPU:0" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/Prod/reduction_indices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/Prod" + op: "Prod" + input: "report_uninitialized_variables/boolean_mask/strided_slice" + input: "report_uninitialized_variables/boolean_mask/Prod/reduction_indices" + device: "/device:CPU:0" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/Shape_1" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 604 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/strided_slice_1/stack" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/strided_slice_1/stack_1" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/strided_slice_1/stack_2" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/strided_slice_1" + op: "StridedSlice" + input: "report_uninitialized_variables/boolean_mask/Shape_1" + input: "report_uninitialized_variables/boolean_mask/strided_slice_1/stack" + input: "report_uninitialized_variables/boolean_mask/strided_slice_1/stack_1" + input: "report_uninitialized_variables/boolean_mask/strided_slice_1/stack_2" + device: "/device:CPU:0" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 1 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/Shape_2" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 604 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/strided_slice_2/stack" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/strided_slice_2/stack_1" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/strided_slice_2/stack_2" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/strided_slice_2" + op: "StridedSlice" + input: "report_uninitialized_variables/boolean_mask/Shape_2" + input: "report_uninitialized_variables/boolean_mask/strided_slice_2/stack" + input: "report_uninitialized_variables/boolean_mask/strided_slice_2/stack_1" + input: "report_uninitialized_variables/boolean_mask/strided_slice_2/stack_2" + device: "/device:CPU:0" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 1 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/concat/values_1" + op: "Pack" + input: "report_uninitialized_variables/boolean_mask/Prod" + device: "/device:CPU:0" + attr { + key: "N" + value { + i: 1 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "axis" + value { + i: 0 + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/concat/axis" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/concat" + op: "ConcatV2" + input: "report_uninitialized_variables/boolean_mask/strided_slice_1" + input: "report_uninitialized_variables/boolean_mask/concat/values_1" + input: "report_uninitialized_variables/boolean_mask/strided_slice_2" + input: "report_uninitialized_variables/boolean_mask/concat/axis" + device: "/device:CPU:0" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/Reshape" + op: "Reshape" + input: "report_uninitialized_variables/Const" + input: "report_uninitialized_variables/boolean_mask/concat" + device: "/device:CPU:0" + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 604 + } + } + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/Reshape_1/shape" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -1 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/Reshape_1" + op: "Reshape" + input: "report_uninitialized_variables/LogicalNot" + input: "report_uninitialized_variables/boolean_mask/Reshape_1/shape" + device: "/device:CPU:0" + attr { + key: "T" + value { + type: DT_BOOL + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 604 + } + } + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/Where" + op: "Where" + input: "report_uninitialized_variables/boolean_mask/Reshape_1" + device: "/device:CPU:0" + attr { + key: "T" + value { + type: DT_BOOL + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/Squeeze" + op: "Squeeze" + input: "report_uninitialized_variables/boolean_mask/Where" + device: "/device:CPU:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + } + } + } + attr { + key: "squeeze_dims" + value { + list { + i: 1 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/GatherV2/axis" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/GatherV2" + op: "GatherV2" + input: "report_uninitialized_variables/boolean_mask/Reshape" + input: "report_uninitialized_variables/boolean_mask/Squeeze" + input: "report_uninitialized_variables/boolean_mask/GatherV2/axis" + device: "/device:CPU:0" + attr { + key: "Taxis" + value { + type: DT_INT32 + } + } + attr { + key: "Tindices" + value { + type: DT_INT64 + } + } + attr { + key: "Tparams" + value { + type: DT_STRING + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + } + } + } + attr { + key: "batch_dims" + value { + i: 0 + } + } +} +node { + name: "report_uninitialized_resources/Const" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "concat/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "concat" + op: "ConcatV2" + input: "report_uninitialized_variables/boolean_mask/GatherV2" + input: "report_uninitialized_resources/Const" + input: "concat/axis" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "report_uninitialized_variables_1/VarIsInitializedOp" + op: "VarIsInitializedOp" + input: "global_step" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized" + op: "IsVariableInitialized" + input: "bert/embeddings/word_embeddings" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_1" + op: "IsVariableInitialized" + input: "bert/embeddings/token_type_embeddings" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_2" + op: "IsVariableInitialized" + input: "bert/embeddings/position_embeddings" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_3" + op: "IsVariableInitialized" + input: "bert/embeddings/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_4" + op: "IsVariableInitialized" + input: "bert/embeddings/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_5" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_6" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_7" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_8" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_9" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_10" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_11" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_12" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_13" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_14" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_15" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_16" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_17" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_18" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_19" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_20" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_21" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_22" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_23" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_24" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_25" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_26" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_27" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_28" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_29" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_30" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_31" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_32" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_33" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_34" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_35" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_36" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_37" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_38" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_39" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_40" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_41" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_42" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_43" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_44" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_45" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_46" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_47" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_48" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_49" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_50" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_51" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_52" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_53" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_54" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_55" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_56" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_57" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_58" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_59" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_60" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_61" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_62" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_63" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_64" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_65" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_66" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_67" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_68" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_69" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_70" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_71" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_72" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_73" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_74" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_75" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_76" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_77" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_78" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_79" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_80" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_81" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_82" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_83" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_84" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_85" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_86" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_87" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_88" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_89" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_90" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_91" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_92" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_93" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_94" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_95" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_96" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_97" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_98" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_99" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_100" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_101" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_102" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_103" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_104" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_105" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_106" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_107" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_108" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_109" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_110" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_111" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_112" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_113" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_114" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_115" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_116" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_117" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_118" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_119" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_120" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_121" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_122" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_123" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_124" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_125" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_126" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_127" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_128" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_129" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_130" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_131" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_132" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_133" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_134" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_135" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_136" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_137" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_138" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_139" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_140" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_141" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_142" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_143" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_144" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_145" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_146" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_147" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_148" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_149" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_150" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_151" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_152" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_153" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_154" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_155" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_156" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_157" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_158" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_159" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_160" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_161" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_162" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_163" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_164" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_165" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_166" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_167" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_168" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_169" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_170" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_171" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_172" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_173" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_174" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_175" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_176" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_177" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_178" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_179" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_180" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_181" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_182" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_183" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_184" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_185" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_186" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_187" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_188" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_189" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_190" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_191" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_192" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_193" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_194" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_195" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_196" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_197" + op: "IsVariableInitialized" + input: "bert/pooler/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_198" + op: "IsVariableInitialized" + input: "bert/pooler/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_199" + op: "IsVariableInitialized" + input: "output_weights" + attr { + key: "_class" + value { + list { + s: "loc:@output_weights" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_200" + op: "IsVariableInitialized" + input: "output_bias" + attr { + key: "_class" + value { + list { + s: "loc:@output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_201" + op: "IsVariableInitialized" + input: "bert/embeddings/word_embeddings/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_202" + op: "IsVariableInitialized" + input: "bert/embeddings/word_embeddings/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_203" + op: "IsVariableInitialized" + input: "bert/embeddings/token_type_embeddings/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_204" + op: "IsVariableInitialized" + input: "bert/embeddings/token_type_embeddings/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_205" + op: "IsVariableInitialized" + input: "bert/embeddings/position_embeddings/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_206" + op: "IsVariableInitialized" + input: "bert/embeddings/position_embeddings/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_207" + op: "IsVariableInitialized" + input: "bert/embeddings/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_208" + op: "IsVariableInitialized" + input: "bert/embeddings/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_209" + op: "IsVariableInitialized" + input: "bert/embeddings/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_210" + op: "IsVariableInitialized" + input: "bert/embeddings/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_211" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_212" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_213" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_214" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_215" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_216" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_217" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_218" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_219" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_220" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_221" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_222" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_223" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_224" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_225" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_226" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_227" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_228" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_229" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_230" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_231" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_232" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_233" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_234" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_235" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_236" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_237" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_238" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_239" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_240" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_241" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_242" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_243" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_244" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_245" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_246" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_247" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_248" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_249" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_250" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_251" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_252" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_253" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_254" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_255" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_256" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_257" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_258" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_259" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_260" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_261" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_262" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_263" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_264" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_265" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_266" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_267" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_268" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_269" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_270" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_271" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_272" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_273" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_274" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_275" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_276" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_277" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_278" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_279" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_280" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_281" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_282" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_283" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_284" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_285" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_286" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_287" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_288" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_289" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_290" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_291" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_292" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_293" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_294" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_295" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_296" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_297" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_298" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_299" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_300" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_301" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_302" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_303" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_304" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_305" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_306" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_307" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_308" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_309" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_310" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_311" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_312" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_313" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_314" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_315" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_316" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_317" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_318" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_319" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_320" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_321" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_322" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_323" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_324" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_325" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_326" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_327" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_328" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_329" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_330" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_331" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_332" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_333" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_334" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_335" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_336" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_337" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_338" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_339" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_340" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_341" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_342" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_343" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_344" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_345" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_346" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_347" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_348" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_349" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_350" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_351" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_352" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_353" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_354" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_355" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_356" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_357" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_358" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_359" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_360" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_361" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_362" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_363" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_364" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_365" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_366" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_367" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_368" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_369" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_370" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_371" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_372" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_373" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_374" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_375" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_376" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_377" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_378" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_379" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_380" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_381" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_382" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_383" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_384" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_385" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_386" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_387" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_388" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_389" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_390" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_391" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_392" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_393" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_394" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_395" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_396" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_397" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_398" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_399" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_400" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_401" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_402" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_403" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_404" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_405" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_406" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_407" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_408" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_409" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_410" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_411" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_412" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_413" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_414" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_415" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_416" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_417" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_418" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_419" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_420" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_421" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_422" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_423" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_424" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_425" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_426" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_427" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_428" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_429" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_430" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_431" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_432" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_433" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_434" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_435" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_436" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_437" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_438" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_439" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_440" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_441" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_442" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_443" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_444" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_445" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_446" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_447" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_448" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_449" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_450" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_451" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_452" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_453" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_454" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_455" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_456" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_457" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_458" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_459" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_460" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_461" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_462" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_463" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_464" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_465" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_466" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_467" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_468" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_469" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_470" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_471" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_472" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_473" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_474" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_475" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_476" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_477" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_478" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_479" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_480" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_481" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_482" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_483" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_484" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_485" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_486" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_487" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_488" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_489" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_490" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_491" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_492" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_493" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_494" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_495" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_496" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_497" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_498" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_499" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_500" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_501" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_502" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_503" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_504" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_505" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_506" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_507" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_508" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_509" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_510" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_511" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_512" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_513" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_514" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_515" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_516" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_517" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_518" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_519" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_520" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_521" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_522" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_523" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_524" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_525" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_526" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_527" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_528" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_529" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_530" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_531" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_532" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_533" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_534" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_535" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_536" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_537" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_538" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_539" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_540" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_541" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_542" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_543" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_544" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_545" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_546" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_547" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_548" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_549" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_550" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_551" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_552" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_553" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_554" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_555" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_556" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_557" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_558" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_559" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_560" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_561" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_562" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_563" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_564" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_565" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_566" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_567" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_568" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_569" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_570" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_571" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_572" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_573" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_574" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_575" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_576" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_577" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_578" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_579" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_580" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_581" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_582" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_583" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_584" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_585" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_586" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_587" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_588" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_589" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_590" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_591" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_592" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_593" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_594" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_595" + op: "IsVariableInitialized" + input: "bert/pooler/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_596" + op: "IsVariableInitialized" + input: "bert/pooler/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_597" + op: "IsVariableInitialized" + input: "bert/pooler/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_598" + op: "IsVariableInitialized" + input: "bert/pooler/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_599" + op: "IsVariableInitialized" + input: "output_weights/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@output_weights/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_600" + op: "IsVariableInitialized" + input: "output_weights/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@output_weights/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_601" + op: "IsVariableInitialized" + input: "output_bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_602" + op: "IsVariableInitialized" + input: "output_bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/stack" + op: "Pack" + input: "report_uninitialized_variables_1/VarIsInitializedOp" + input: "report_uninitialized_variables_1/IsVariableInitialized" + input: "report_uninitialized_variables_1/IsVariableInitialized_1" + input: "report_uninitialized_variables_1/IsVariableInitialized_2" + input: "report_uninitialized_variables_1/IsVariableInitialized_3" + input: "report_uninitialized_variables_1/IsVariableInitialized_4" + input: "report_uninitialized_variables_1/IsVariableInitialized_5" + input: "report_uninitialized_variables_1/IsVariableInitialized_6" + input: "report_uninitialized_variables_1/IsVariableInitialized_7" + input: "report_uninitialized_variables_1/IsVariableInitialized_8" + input: "report_uninitialized_variables_1/IsVariableInitialized_9" + input: "report_uninitialized_variables_1/IsVariableInitialized_10" + input: "report_uninitialized_variables_1/IsVariableInitialized_11" + input: "report_uninitialized_variables_1/IsVariableInitialized_12" + input: "report_uninitialized_variables_1/IsVariableInitialized_13" + input: "report_uninitialized_variables_1/IsVariableInitialized_14" + input: "report_uninitialized_variables_1/IsVariableInitialized_15" + input: "report_uninitialized_variables_1/IsVariableInitialized_16" + input: "report_uninitialized_variables_1/IsVariableInitialized_17" + input: "report_uninitialized_variables_1/IsVariableInitialized_18" + input: "report_uninitialized_variables_1/IsVariableInitialized_19" + input: "report_uninitialized_variables_1/IsVariableInitialized_20" + input: "report_uninitialized_variables_1/IsVariableInitialized_21" + input: "report_uninitialized_variables_1/IsVariableInitialized_22" + input: "report_uninitialized_variables_1/IsVariableInitialized_23" + input: "report_uninitialized_variables_1/IsVariableInitialized_24" + input: "report_uninitialized_variables_1/IsVariableInitialized_25" + input: "report_uninitialized_variables_1/IsVariableInitialized_26" + input: "report_uninitialized_variables_1/IsVariableInitialized_27" + input: "report_uninitialized_variables_1/IsVariableInitialized_28" + input: "report_uninitialized_variables_1/IsVariableInitialized_29" + input: "report_uninitialized_variables_1/IsVariableInitialized_30" + input: "report_uninitialized_variables_1/IsVariableInitialized_31" + input: "report_uninitialized_variables_1/IsVariableInitialized_32" + input: "report_uninitialized_variables_1/IsVariableInitialized_33" + input: "report_uninitialized_variables_1/IsVariableInitialized_34" + input: "report_uninitialized_variables_1/IsVariableInitialized_35" + input: "report_uninitialized_variables_1/IsVariableInitialized_36" + input: "report_uninitialized_variables_1/IsVariableInitialized_37" + input: "report_uninitialized_variables_1/IsVariableInitialized_38" + input: "report_uninitialized_variables_1/IsVariableInitialized_39" + input: "report_uninitialized_variables_1/IsVariableInitialized_40" + input: "report_uninitialized_variables_1/IsVariableInitialized_41" + input: "report_uninitialized_variables_1/IsVariableInitialized_42" + input: "report_uninitialized_variables_1/IsVariableInitialized_43" + input: "report_uninitialized_variables_1/IsVariableInitialized_44" + input: "report_uninitialized_variables_1/IsVariableInitialized_45" + input: "report_uninitialized_variables_1/IsVariableInitialized_46" + input: "report_uninitialized_variables_1/IsVariableInitialized_47" + input: "report_uninitialized_variables_1/IsVariableInitialized_48" + input: "report_uninitialized_variables_1/IsVariableInitialized_49" + input: "report_uninitialized_variables_1/IsVariableInitialized_50" + input: "report_uninitialized_variables_1/IsVariableInitialized_51" + input: "report_uninitialized_variables_1/IsVariableInitialized_52" + input: "report_uninitialized_variables_1/IsVariableInitialized_53" + input: "report_uninitialized_variables_1/IsVariableInitialized_54" + input: "report_uninitialized_variables_1/IsVariableInitialized_55" + input: "report_uninitialized_variables_1/IsVariableInitialized_56" + input: "report_uninitialized_variables_1/IsVariableInitialized_57" + input: "report_uninitialized_variables_1/IsVariableInitialized_58" + input: "report_uninitialized_variables_1/IsVariableInitialized_59" + input: "report_uninitialized_variables_1/IsVariableInitialized_60" + input: "report_uninitialized_variables_1/IsVariableInitialized_61" + input: "report_uninitialized_variables_1/IsVariableInitialized_62" + input: "report_uninitialized_variables_1/IsVariableInitialized_63" + input: "report_uninitialized_variables_1/IsVariableInitialized_64" + input: "report_uninitialized_variables_1/IsVariableInitialized_65" + input: "report_uninitialized_variables_1/IsVariableInitialized_66" + input: "report_uninitialized_variables_1/IsVariableInitialized_67" + input: "report_uninitialized_variables_1/IsVariableInitialized_68" + input: "report_uninitialized_variables_1/IsVariableInitialized_69" + input: "report_uninitialized_variables_1/IsVariableInitialized_70" + input: "report_uninitialized_variables_1/IsVariableInitialized_71" + input: "report_uninitialized_variables_1/IsVariableInitialized_72" + input: "report_uninitialized_variables_1/IsVariableInitialized_73" + input: "report_uninitialized_variables_1/IsVariableInitialized_74" + input: "report_uninitialized_variables_1/IsVariableInitialized_75" + input: "report_uninitialized_variables_1/IsVariableInitialized_76" + input: "report_uninitialized_variables_1/IsVariableInitialized_77" + input: "report_uninitialized_variables_1/IsVariableInitialized_78" + input: "report_uninitialized_variables_1/IsVariableInitialized_79" + input: "report_uninitialized_variables_1/IsVariableInitialized_80" + input: "report_uninitialized_variables_1/IsVariableInitialized_81" + input: "report_uninitialized_variables_1/IsVariableInitialized_82" + input: "report_uninitialized_variables_1/IsVariableInitialized_83" + input: "report_uninitialized_variables_1/IsVariableInitialized_84" + input: "report_uninitialized_variables_1/IsVariableInitialized_85" + input: "report_uninitialized_variables_1/IsVariableInitialized_86" + input: "report_uninitialized_variables_1/IsVariableInitialized_87" + input: "report_uninitialized_variables_1/IsVariableInitialized_88" + input: "report_uninitialized_variables_1/IsVariableInitialized_89" + input: "report_uninitialized_variables_1/IsVariableInitialized_90" + input: "report_uninitialized_variables_1/IsVariableInitialized_91" + input: "report_uninitialized_variables_1/IsVariableInitialized_92" + input: "report_uninitialized_variables_1/IsVariableInitialized_93" + input: "report_uninitialized_variables_1/IsVariableInitialized_94" + input: "report_uninitialized_variables_1/IsVariableInitialized_95" + input: "report_uninitialized_variables_1/IsVariableInitialized_96" + input: "report_uninitialized_variables_1/IsVariableInitialized_97" + input: "report_uninitialized_variables_1/IsVariableInitialized_98" + input: "report_uninitialized_variables_1/IsVariableInitialized_99" + input: "report_uninitialized_variables_1/IsVariableInitialized_100" + input: "report_uninitialized_variables_1/IsVariableInitialized_101" + input: "report_uninitialized_variables_1/IsVariableInitialized_102" + input: "report_uninitialized_variables_1/IsVariableInitialized_103" + input: "report_uninitialized_variables_1/IsVariableInitialized_104" + input: "report_uninitialized_variables_1/IsVariableInitialized_105" + input: "report_uninitialized_variables_1/IsVariableInitialized_106" + input: "report_uninitialized_variables_1/IsVariableInitialized_107" + input: "report_uninitialized_variables_1/IsVariableInitialized_108" + input: "report_uninitialized_variables_1/IsVariableInitialized_109" + input: "report_uninitialized_variables_1/IsVariableInitialized_110" + input: "report_uninitialized_variables_1/IsVariableInitialized_111" + input: "report_uninitialized_variables_1/IsVariableInitialized_112" + input: "report_uninitialized_variables_1/IsVariableInitialized_113" + input: "report_uninitialized_variables_1/IsVariableInitialized_114" + input: "report_uninitialized_variables_1/IsVariableInitialized_115" + input: "report_uninitialized_variables_1/IsVariableInitialized_116" + input: "report_uninitialized_variables_1/IsVariableInitialized_117" + input: "report_uninitialized_variables_1/IsVariableInitialized_118" + input: "report_uninitialized_variables_1/IsVariableInitialized_119" + input: "report_uninitialized_variables_1/IsVariableInitialized_120" + input: "report_uninitialized_variables_1/IsVariableInitialized_121" + input: "report_uninitialized_variables_1/IsVariableInitialized_122" + input: "report_uninitialized_variables_1/IsVariableInitialized_123" + input: "report_uninitialized_variables_1/IsVariableInitialized_124" + input: "report_uninitialized_variables_1/IsVariableInitialized_125" + input: "report_uninitialized_variables_1/IsVariableInitialized_126" + input: "report_uninitialized_variables_1/IsVariableInitialized_127" + input: "report_uninitialized_variables_1/IsVariableInitialized_128" + input: "report_uninitialized_variables_1/IsVariableInitialized_129" + input: "report_uninitialized_variables_1/IsVariableInitialized_130" + input: "report_uninitialized_variables_1/IsVariableInitialized_131" + input: "report_uninitialized_variables_1/IsVariableInitialized_132" + input: "report_uninitialized_variables_1/IsVariableInitialized_133" + input: "report_uninitialized_variables_1/IsVariableInitialized_134" + input: "report_uninitialized_variables_1/IsVariableInitialized_135" + input: "report_uninitialized_variables_1/IsVariableInitialized_136" + input: "report_uninitialized_variables_1/IsVariableInitialized_137" + input: "report_uninitialized_variables_1/IsVariableInitialized_138" + input: "report_uninitialized_variables_1/IsVariableInitialized_139" + input: "report_uninitialized_variables_1/IsVariableInitialized_140" + input: "report_uninitialized_variables_1/IsVariableInitialized_141" + input: "report_uninitialized_variables_1/IsVariableInitialized_142" + input: "report_uninitialized_variables_1/IsVariableInitialized_143" + input: "report_uninitialized_variables_1/IsVariableInitialized_144" + input: "report_uninitialized_variables_1/IsVariableInitialized_145" + input: "report_uninitialized_variables_1/IsVariableInitialized_146" + input: "report_uninitialized_variables_1/IsVariableInitialized_147" + input: "report_uninitialized_variables_1/IsVariableInitialized_148" + input: "report_uninitialized_variables_1/IsVariableInitialized_149" + input: "report_uninitialized_variables_1/IsVariableInitialized_150" + input: "report_uninitialized_variables_1/IsVariableInitialized_151" + input: "report_uninitialized_variables_1/IsVariableInitialized_152" + input: "report_uninitialized_variables_1/IsVariableInitialized_153" + input: "report_uninitialized_variables_1/IsVariableInitialized_154" + input: "report_uninitialized_variables_1/IsVariableInitialized_155" + input: "report_uninitialized_variables_1/IsVariableInitialized_156" + input: "report_uninitialized_variables_1/IsVariableInitialized_157" + input: "report_uninitialized_variables_1/IsVariableInitialized_158" + input: "report_uninitialized_variables_1/IsVariableInitialized_159" + input: "report_uninitialized_variables_1/IsVariableInitialized_160" + input: "report_uninitialized_variables_1/IsVariableInitialized_161" + input: "report_uninitialized_variables_1/IsVariableInitialized_162" + input: "report_uninitialized_variables_1/IsVariableInitialized_163" + input: "report_uninitialized_variables_1/IsVariableInitialized_164" + input: "report_uninitialized_variables_1/IsVariableInitialized_165" + input: "report_uninitialized_variables_1/IsVariableInitialized_166" + input: "report_uninitialized_variables_1/IsVariableInitialized_167" + input: "report_uninitialized_variables_1/IsVariableInitialized_168" + input: "report_uninitialized_variables_1/IsVariableInitialized_169" + input: "report_uninitialized_variables_1/IsVariableInitialized_170" + input: "report_uninitialized_variables_1/IsVariableInitialized_171" + input: "report_uninitialized_variables_1/IsVariableInitialized_172" + input: "report_uninitialized_variables_1/IsVariableInitialized_173" + input: "report_uninitialized_variables_1/IsVariableInitialized_174" + input: "report_uninitialized_variables_1/IsVariableInitialized_175" + input: "report_uninitialized_variables_1/IsVariableInitialized_176" + input: "report_uninitialized_variables_1/IsVariableInitialized_177" + input: "report_uninitialized_variables_1/IsVariableInitialized_178" + input: "report_uninitialized_variables_1/IsVariableInitialized_179" + input: "report_uninitialized_variables_1/IsVariableInitialized_180" + input: "report_uninitialized_variables_1/IsVariableInitialized_181" + input: "report_uninitialized_variables_1/IsVariableInitialized_182" + input: "report_uninitialized_variables_1/IsVariableInitialized_183" + input: "report_uninitialized_variables_1/IsVariableInitialized_184" + input: "report_uninitialized_variables_1/IsVariableInitialized_185" + input: "report_uninitialized_variables_1/IsVariableInitialized_186" + input: "report_uninitialized_variables_1/IsVariableInitialized_187" + input: "report_uninitialized_variables_1/IsVariableInitialized_188" + input: "report_uninitialized_variables_1/IsVariableInitialized_189" + input: "report_uninitialized_variables_1/IsVariableInitialized_190" + input: "report_uninitialized_variables_1/IsVariableInitialized_191" + input: "report_uninitialized_variables_1/IsVariableInitialized_192" + input: "report_uninitialized_variables_1/IsVariableInitialized_193" + input: "report_uninitialized_variables_1/IsVariableInitialized_194" + input: "report_uninitialized_variables_1/IsVariableInitialized_195" + input: "report_uninitialized_variables_1/IsVariableInitialized_196" + input: "report_uninitialized_variables_1/IsVariableInitialized_197" + input: "report_uninitialized_variables_1/IsVariableInitialized_198" + input: "report_uninitialized_variables_1/IsVariableInitialized_199" + input: "report_uninitialized_variables_1/IsVariableInitialized_200" + input: "report_uninitialized_variables_1/IsVariableInitialized_201" + input: "report_uninitialized_variables_1/IsVariableInitialized_202" + input: "report_uninitialized_variables_1/IsVariableInitialized_203" + input: "report_uninitialized_variables_1/IsVariableInitialized_204" + input: "report_uninitialized_variables_1/IsVariableInitialized_205" + input: "report_uninitialized_variables_1/IsVariableInitialized_206" + input: "report_uninitialized_variables_1/IsVariableInitialized_207" + input: "report_uninitialized_variables_1/IsVariableInitialized_208" + input: "report_uninitialized_variables_1/IsVariableInitialized_209" + input: "report_uninitialized_variables_1/IsVariableInitialized_210" + input: "report_uninitialized_variables_1/IsVariableInitialized_211" + input: "report_uninitialized_variables_1/IsVariableInitialized_212" + input: "report_uninitialized_variables_1/IsVariableInitialized_213" + input: "report_uninitialized_variables_1/IsVariableInitialized_214" + input: "report_uninitialized_variables_1/IsVariableInitialized_215" + input: "report_uninitialized_variables_1/IsVariableInitialized_216" + input: "report_uninitialized_variables_1/IsVariableInitialized_217" + input: "report_uninitialized_variables_1/IsVariableInitialized_218" + input: "report_uninitialized_variables_1/IsVariableInitialized_219" + input: "report_uninitialized_variables_1/IsVariableInitialized_220" + input: "report_uninitialized_variables_1/IsVariableInitialized_221" + input: "report_uninitialized_variables_1/IsVariableInitialized_222" + input: "report_uninitialized_variables_1/IsVariableInitialized_223" + input: "report_uninitialized_variables_1/IsVariableInitialized_224" + input: "report_uninitialized_variables_1/IsVariableInitialized_225" + input: "report_uninitialized_variables_1/IsVariableInitialized_226" + input: "report_uninitialized_variables_1/IsVariableInitialized_227" + input: "report_uninitialized_variables_1/IsVariableInitialized_228" + input: "report_uninitialized_variables_1/IsVariableInitialized_229" + input: "report_uninitialized_variables_1/IsVariableInitialized_230" + input: "report_uninitialized_variables_1/IsVariableInitialized_231" + input: "report_uninitialized_variables_1/IsVariableInitialized_232" + input: "report_uninitialized_variables_1/IsVariableInitialized_233" + input: "report_uninitialized_variables_1/IsVariableInitialized_234" + input: "report_uninitialized_variables_1/IsVariableInitialized_235" + input: "report_uninitialized_variables_1/IsVariableInitialized_236" + input: "report_uninitialized_variables_1/IsVariableInitialized_237" + input: "report_uninitialized_variables_1/IsVariableInitialized_238" + input: "report_uninitialized_variables_1/IsVariableInitialized_239" + input: "report_uninitialized_variables_1/IsVariableInitialized_240" + input: "report_uninitialized_variables_1/IsVariableInitialized_241" + input: "report_uninitialized_variables_1/IsVariableInitialized_242" + input: "report_uninitialized_variables_1/IsVariableInitialized_243" + input: "report_uninitialized_variables_1/IsVariableInitialized_244" + input: "report_uninitialized_variables_1/IsVariableInitialized_245" + input: "report_uninitialized_variables_1/IsVariableInitialized_246" + input: "report_uninitialized_variables_1/IsVariableInitialized_247" + input: "report_uninitialized_variables_1/IsVariableInitialized_248" + input: "report_uninitialized_variables_1/IsVariableInitialized_249" + input: "report_uninitialized_variables_1/IsVariableInitialized_250" + input: "report_uninitialized_variables_1/IsVariableInitialized_251" + input: "report_uninitialized_variables_1/IsVariableInitialized_252" + input: "report_uninitialized_variables_1/IsVariableInitialized_253" + input: "report_uninitialized_variables_1/IsVariableInitialized_254" + input: "report_uninitialized_variables_1/IsVariableInitialized_255" + input: "report_uninitialized_variables_1/IsVariableInitialized_256" + input: "report_uninitialized_variables_1/IsVariableInitialized_257" + input: "report_uninitialized_variables_1/IsVariableInitialized_258" + input: "report_uninitialized_variables_1/IsVariableInitialized_259" + input: "report_uninitialized_variables_1/IsVariableInitialized_260" + input: "report_uninitialized_variables_1/IsVariableInitialized_261" + input: "report_uninitialized_variables_1/IsVariableInitialized_262" + input: "report_uninitialized_variables_1/IsVariableInitialized_263" + input: "report_uninitialized_variables_1/IsVariableInitialized_264" + input: "report_uninitialized_variables_1/IsVariableInitialized_265" + input: "report_uninitialized_variables_1/IsVariableInitialized_266" + input: "report_uninitialized_variables_1/IsVariableInitialized_267" + input: "report_uninitialized_variables_1/IsVariableInitialized_268" + input: "report_uninitialized_variables_1/IsVariableInitialized_269" + input: "report_uninitialized_variables_1/IsVariableInitialized_270" + input: "report_uninitialized_variables_1/IsVariableInitialized_271" + input: "report_uninitialized_variables_1/IsVariableInitialized_272" + input: "report_uninitialized_variables_1/IsVariableInitialized_273" + input: "report_uninitialized_variables_1/IsVariableInitialized_274" + input: "report_uninitialized_variables_1/IsVariableInitialized_275" + input: "report_uninitialized_variables_1/IsVariableInitialized_276" + input: "report_uninitialized_variables_1/IsVariableInitialized_277" + input: "report_uninitialized_variables_1/IsVariableInitialized_278" + input: "report_uninitialized_variables_1/IsVariableInitialized_279" + input: "report_uninitialized_variables_1/IsVariableInitialized_280" + input: "report_uninitialized_variables_1/IsVariableInitialized_281" + input: "report_uninitialized_variables_1/IsVariableInitialized_282" + input: "report_uninitialized_variables_1/IsVariableInitialized_283" + input: "report_uninitialized_variables_1/IsVariableInitialized_284" + input: "report_uninitialized_variables_1/IsVariableInitialized_285" + input: "report_uninitialized_variables_1/IsVariableInitialized_286" + input: "report_uninitialized_variables_1/IsVariableInitialized_287" + input: "report_uninitialized_variables_1/IsVariableInitialized_288" + input: "report_uninitialized_variables_1/IsVariableInitialized_289" + input: "report_uninitialized_variables_1/IsVariableInitialized_290" + input: "report_uninitialized_variables_1/IsVariableInitialized_291" + input: "report_uninitialized_variables_1/IsVariableInitialized_292" + input: "report_uninitialized_variables_1/IsVariableInitialized_293" + input: "report_uninitialized_variables_1/IsVariableInitialized_294" + input: "report_uninitialized_variables_1/IsVariableInitialized_295" + input: "report_uninitialized_variables_1/IsVariableInitialized_296" + input: "report_uninitialized_variables_1/IsVariableInitialized_297" + input: "report_uninitialized_variables_1/IsVariableInitialized_298" + input: "report_uninitialized_variables_1/IsVariableInitialized_299" + input: "report_uninitialized_variables_1/IsVariableInitialized_300" + input: "report_uninitialized_variables_1/IsVariableInitialized_301" + input: "report_uninitialized_variables_1/IsVariableInitialized_302" + input: "report_uninitialized_variables_1/IsVariableInitialized_303" + input: "report_uninitialized_variables_1/IsVariableInitialized_304" + input: "report_uninitialized_variables_1/IsVariableInitialized_305" + input: "report_uninitialized_variables_1/IsVariableInitialized_306" + input: "report_uninitialized_variables_1/IsVariableInitialized_307" + input: "report_uninitialized_variables_1/IsVariableInitialized_308" + input: "report_uninitialized_variables_1/IsVariableInitialized_309" + input: "report_uninitialized_variables_1/IsVariableInitialized_310" + input: "report_uninitialized_variables_1/IsVariableInitialized_311" + input: "report_uninitialized_variables_1/IsVariableInitialized_312" + input: "report_uninitialized_variables_1/IsVariableInitialized_313" + input: "report_uninitialized_variables_1/IsVariableInitialized_314" + input: "report_uninitialized_variables_1/IsVariableInitialized_315" + input: "report_uninitialized_variables_1/IsVariableInitialized_316" + input: "report_uninitialized_variables_1/IsVariableInitialized_317" + input: "report_uninitialized_variables_1/IsVariableInitialized_318" + input: "report_uninitialized_variables_1/IsVariableInitialized_319" + input: "report_uninitialized_variables_1/IsVariableInitialized_320" + input: "report_uninitialized_variables_1/IsVariableInitialized_321" + input: "report_uninitialized_variables_1/IsVariableInitialized_322" + input: "report_uninitialized_variables_1/IsVariableInitialized_323" + input: "report_uninitialized_variables_1/IsVariableInitialized_324" + input: "report_uninitialized_variables_1/IsVariableInitialized_325" + input: "report_uninitialized_variables_1/IsVariableInitialized_326" + input: "report_uninitialized_variables_1/IsVariableInitialized_327" + input: "report_uninitialized_variables_1/IsVariableInitialized_328" + input: "report_uninitialized_variables_1/IsVariableInitialized_329" + input: "report_uninitialized_variables_1/IsVariableInitialized_330" + input: "report_uninitialized_variables_1/IsVariableInitialized_331" + input: "report_uninitialized_variables_1/IsVariableInitialized_332" + input: "report_uninitialized_variables_1/IsVariableInitialized_333" + input: "report_uninitialized_variables_1/IsVariableInitialized_334" + input: "report_uninitialized_variables_1/IsVariableInitialized_335" + input: "report_uninitialized_variables_1/IsVariableInitialized_336" + input: "report_uninitialized_variables_1/IsVariableInitialized_337" + input: "report_uninitialized_variables_1/IsVariableInitialized_338" + input: "report_uninitialized_variables_1/IsVariableInitialized_339" + input: "report_uninitialized_variables_1/IsVariableInitialized_340" + input: "report_uninitialized_variables_1/IsVariableInitialized_341" + input: "report_uninitialized_variables_1/IsVariableInitialized_342" + input: "report_uninitialized_variables_1/IsVariableInitialized_343" + input: "report_uninitialized_variables_1/IsVariableInitialized_344" + input: "report_uninitialized_variables_1/IsVariableInitialized_345" + input: "report_uninitialized_variables_1/IsVariableInitialized_346" + input: "report_uninitialized_variables_1/IsVariableInitialized_347" + input: "report_uninitialized_variables_1/IsVariableInitialized_348" + input: "report_uninitialized_variables_1/IsVariableInitialized_349" + input: "report_uninitialized_variables_1/IsVariableInitialized_350" + input: "report_uninitialized_variables_1/IsVariableInitialized_351" + input: "report_uninitialized_variables_1/IsVariableInitialized_352" + input: "report_uninitialized_variables_1/IsVariableInitialized_353" + input: "report_uninitialized_variables_1/IsVariableInitialized_354" + input: "report_uninitialized_variables_1/IsVariableInitialized_355" + input: "report_uninitialized_variables_1/IsVariableInitialized_356" + input: "report_uninitialized_variables_1/IsVariableInitialized_357" + input: "report_uninitialized_variables_1/IsVariableInitialized_358" + input: "report_uninitialized_variables_1/IsVariableInitialized_359" + input: "report_uninitialized_variables_1/IsVariableInitialized_360" + input: "report_uninitialized_variables_1/IsVariableInitialized_361" + input: "report_uninitialized_variables_1/IsVariableInitialized_362" + input: "report_uninitialized_variables_1/IsVariableInitialized_363" + input: "report_uninitialized_variables_1/IsVariableInitialized_364" + input: "report_uninitialized_variables_1/IsVariableInitialized_365" + input: "report_uninitialized_variables_1/IsVariableInitialized_366" + input: "report_uninitialized_variables_1/IsVariableInitialized_367" + input: "report_uninitialized_variables_1/IsVariableInitialized_368" + input: "report_uninitialized_variables_1/IsVariableInitialized_369" + input: "report_uninitialized_variables_1/IsVariableInitialized_370" + input: "report_uninitialized_variables_1/IsVariableInitialized_371" + input: "report_uninitialized_variables_1/IsVariableInitialized_372" + input: "report_uninitialized_variables_1/IsVariableInitialized_373" + input: "report_uninitialized_variables_1/IsVariableInitialized_374" + input: "report_uninitialized_variables_1/IsVariableInitialized_375" + input: "report_uninitialized_variables_1/IsVariableInitialized_376" + input: "report_uninitialized_variables_1/IsVariableInitialized_377" + input: "report_uninitialized_variables_1/IsVariableInitialized_378" + input: "report_uninitialized_variables_1/IsVariableInitialized_379" + input: "report_uninitialized_variables_1/IsVariableInitialized_380" + input: "report_uninitialized_variables_1/IsVariableInitialized_381" + input: "report_uninitialized_variables_1/IsVariableInitialized_382" + input: "report_uninitialized_variables_1/IsVariableInitialized_383" + input: "report_uninitialized_variables_1/IsVariableInitialized_384" + input: "report_uninitialized_variables_1/IsVariableInitialized_385" + input: "report_uninitialized_variables_1/IsVariableInitialized_386" + input: "report_uninitialized_variables_1/IsVariableInitialized_387" + input: "report_uninitialized_variables_1/IsVariableInitialized_388" + input: "report_uninitialized_variables_1/IsVariableInitialized_389" + input: "report_uninitialized_variables_1/IsVariableInitialized_390" + input: "report_uninitialized_variables_1/IsVariableInitialized_391" + input: "report_uninitialized_variables_1/IsVariableInitialized_392" + input: "report_uninitialized_variables_1/IsVariableInitialized_393" + input: "report_uninitialized_variables_1/IsVariableInitialized_394" + input: "report_uninitialized_variables_1/IsVariableInitialized_395" + input: "report_uninitialized_variables_1/IsVariableInitialized_396" + input: "report_uninitialized_variables_1/IsVariableInitialized_397" + input: "report_uninitialized_variables_1/IsVariableInitialized_398" + input: "report_uninitialized_variables_1/IsVariableInitialized_399" + input: "report_uninitialized_variables_1/IsVariableInitialized_400" + input: "report_uninitialized_variables_1/IsVariableInitialized_401" + input: "report_uninitialized_variables_1/IsVariableInitialized_402" + input: "report_uninitialized_variables_1/IsVariableInitialized_403" + input: "report_uninitialized_variables_1/IsVariableInitialized_404" + input: "report_uninitialized_variables_1/IsVariableInitialized_405" + input: "report_uninitialized_variables_1/IsVariableInitialized_406" + input: "report_uninitialized_variables_1/IsVariableInitialized_407" + input: "report_uninitialized_variables_1/IsVariableInitialized_408" + input: "report_uninitialized_variables_1/IsVariableInitialized_409" + input: "report_uninitialized_variables_1/IsVariableInitialized_410" + input: "report_uninitialized_variables_1/IsVariableInitialized_411" + input: "report_uninitialized_variables_1/IsVariableInitialized_412" + input: "report_uninitialized_variables_1/IsVariableInitialized_413" + input: "report_uninitialized_variables_1/IsVariableInitialized_414" + input: "report_uninitialized_variables_1/IsVariableInitialized_415" + input: "report_uninitialized_variables_1/IsVariableInitialized_416" + input: "report_uninitialized_variables_1/IsVariableInitialized_417" + input: "report_uninitialized_variables_1/IsVariableInitialized_418" + input: "report_uninitialized_variables_1/IsVariableInitialized_419" + input: "report_uninitialized_variables_1/IsVariableInitialized_420" + input: "report_uninitialized_variables_1/IsVariableInitialized_421" + input: "report_uninitialized_variables_1/IsVariableInitialized_422" + input: "report_uninitialized_variables_1/IsVariableInitialized_423" + input: "report_uninitialized_variables_1/IsVariableInitialized_424" + input: "report_uninitialized_variables_1/IsVariableInitialized_425" + input: "report_uninitialized_variables_1/IsVariableInitialized_426" + input: "report_uninitialized_variables_1/IsVariableInitialized_427" + input: "report_uninitialized_variables_1/IsVariableInitialized_428" + input: "report_uninitialized_variables_1/IsVariableInitialized_429" + input: "report_uninitialized_variables_1/IsVariableInitialized_430" + input: "report_uninitialized_variables_1/IsVariableInitialized_431" + input: "report_uninitialized_variables_1/IsVariableInitialized_432" + input: "report_uninitialized_variables_1/IsVariableInitialized_433" + input: "report_uninitialized_variables_1/IsVariableInitialized_434" + input: "report_uninitialized_variables_1/IsVariableInitialized_435" + input: "report_uninitialized_variables_1/IsVariableInitialized_436" + input: "report_uninitialized_variables_1/IsVariableInitialized_437" + input: "report_uninitialized_variables_1/IsVariableInitialized_438" + input: "report_uninitialized_variables_1/IsVariableInitialized_439" + input: "report_uninitialized_variables_1/IsVariableInitialized_440" + input: "report_uninitialized_variables_1/IsVariableInitialized_441" + input: "report_uninitialized_variables_1/IsVariableInitialized_442" + input: "report_uninitialized_variables_1/IsVariableInitialized_443" + input: "report_uninitialized_variables_1/IsVariableInitialized_444" + input: "report_uninitialized_variables_1/IsVariableInitialized_445" + input: "report_uninitialized_variables_1/IsVariableInitialized_446" + input: "report_uninitialized_variables_1/IsVariableInitialized_447" + input: "report_uninitialized_variables_1/IsVariableInitialized_448" + input: "report_uninitialized_variables_1/IsVariableInitialized_449" + input: "report_uninitialized_variables_1/IsVariableInitialized_450" + input: "report_uninitialized_variables_1/IsVariableInitialized_451" + input: "report_uninitialized_variables_1/IsVariableInitialized_452" + input: "report_uninitialized_variables_1/IsVariableInitialized_453" + input: "report_uninitialized_variables_1/IsVariableInitialized_454" + input: "report_uninitialized_variables_1/IsVariableInitialized_455" + input: "report_uninitialized_variables_1/IsVariableInitialized_456" + input: "report_uninitialized_variables_1/IsVariableInitialized_457" + input: "report_uninitialized_variables_1/IsVariableInitialized_458" + input: "report_uninitialized_variables_1/IsVariableInitialized_459" + input: "report_uninitialized_variables_1/IsVariableInitialized_460" + input: "report_uninitialized_variables_1/IsVariableInitialized_461" + input: "report_uninitialized_variables_1/IsVariableInitialized_462" + input: "report_uninitialized_variables_1/IsVariableInitialized_463" + input: "report_uninitialized_variables_1/IsVariableInitialized_464" + input: "report_uninitialized_variables_1/IsVariableInitialized_465" + input: "report_uninitialized_variables_1/IsVariableInitialized_466" + input: "report_uninitialized_variables_1/IsVariableInitialized_467" + input: "report_uninitialized_variables_1/IsVariableInitialized_468" + input: "report_uninitialized_variables_1/IsVariableInitialized_469" + input: "report_uninitialized_variables_1/IsVariableInitialized_470" + input: "report_uninitialized_variables_1/IsVariableInitialized_471" + input: "report_uninitialized_variables_1/IsVariableInitialized_472" + input: "report_uninitialized_variables_1/IsVariableInitialized_473" + input: "report_uninitialized_variables_1/IsVariableInitialized_474" + input: "report_uninitialized_variables_1/IsVariableInitialized_475" + input: "report_uninitialized_variables_1/IsVariableInitialized_476" + input: "report_uninitialized_variables_1/IsVariableInitialized_477" + input: "report_uninitialized_variables_1/IsVariableInitialized_478" + input: "report_uninitialized_variables_1/IsVariableInitialized_479" + input: "report_uninitialized_variables_1/IsVariableInitialized_480" + input: "report_uninitialized_variables_1/IsVariableInitialized_481" + input: "report_uninitialized_variables_1/IsVariableInitialized_482" + input: "report_uninitialized_variables_1/IsVariableInitialized_483" + input: "report_uninitialized_variables_1/IsVariableInitialized_484" + input: "report_uninitialized_variables_1/IsVariableInitialized_485" + input: "report_uninitialized_variables_1/IsVariableInitialized_486" + input: "report_uninitialized_variables_1/IsVariableInitialized_487" + input: "report_uninitialized_variables_1/IsVariableInitialized_488" + input: "report_uninitialized_variables_1/IsVariableInitialized_489" + input: "report_uninitialized_variables_1/IsVariableInitialized_490" + input: "report_uninitialized_variables_1/IsVariableInitialized_491" + input: "report_uninitialized_variables_1/IsVariableInitialized_492" + input: "report_uninitialized_variables_1/IsVariableInitialized_493" + input: "report_uninitialized_variables_1/IsVariableInitialized_494" + input: "report_uninitialized_variables_1/IsVariableInitialized_495" + input: "report_uninitialized_variables_1/IsVariableInitialized_496" + input: "report_uninitialized_variables_1/IsVariableInitialized_497" + input: "report_uninitialized_variables_1/IsVariableInitialized_498" + input: "report_uninitialized_variables_1/IsVariableInitialized_499" + input: "report_uninitialized_variables_1/IsVariableInitialized_500" + input: "report_uninitialized_variables_1/IsVariableInitialized_501" + input: "report_uninitialized_variables_1/IsVariableInitialized_502" + input: "report_uninitialized_variables_1/IsVariableInitialized_503" + input: "report_uninitialized_variables_1/IsVariableInitialized_504" + input: "report_uninitialized_variables_1/IsVariableInitialized_505" + input: "report_uninitialized_variables_1/IsVariableInitialized_506" + input: "report_uninitialized_variables_1/IsVariableInitialized_507" + input: "report_uninitialized_variables_1/IsVariableInitialized_508" + input: "report_uninitialized_variables_1/IsVariableInitialized_509" + input: "report_uninitialized_variables_1/IsVariableInitialized_510" + input: "report_uninitialized_variables_1/IsVariableInitialized_511" + input: "report_uninitialized_variables_1/IsVariableInitialized_512" + input: "report_uninitialized_variables_1/IsVariableInitialized_513" + input: "report_uninitialized_variables_1/IsVariableInitialized_514" + input: "report_uninitialized_variables_1/IsVariableInitialized_515" + input: "report_uninitialized_variables_1/IsVariableInitialized_516" + input: "report_uninitialized_variables_1/IsVariableInitialized_517" + input: "report_uninitialized_variables_1/IsVariableInitialized_518" + input: "report_uninitialized_variables_1/IsVariableInitialized_519" + input: "report_uninitialized_variables_1/IsVariableInitialized_520" + input: "report_uninitialized_variables_1/IsVariableInitialized_521" + input: "report_uninitialized_variables_1/IsVariableInitialized_522" + input: "report_uninitialized_variables_1/IsVariableInitialized_523" + input: "report_uninitialized_variables_1/IsVariableInitialized_524" + input: "report_uninitialized_variables_1/IsVariableInitialized_525" + input: "report_uninitialized_variables_1/IsVariableInitialized_526" + input: "report_uninitialized_variables_1/IsVariableInitialized_527" + input: "report_uninitialized_variables_1/IsVariableInitialized_528" + input: "report_uninitialized_variables_1/IsVariableInitialized_529" + input: "report_uninitialized_variables_1/IsVariableInitialized_530" + input: "report_uninitialized_variables_1/IsVariableInitialized_531" + input: "report_uninitialized_variables_1/IsVariableInitialized_532" + input: "report_uninitialized_variables_1/IsVariableInitialized_533" + input: "report_uninitialized_variables_1/IsVariableInitialized_534" + input: "report_uninitialized_variables_1/IsVariableInitialized_535" + input: "report_uninitialized_variables_1/IsVariableInitialized_536" + input: "report_uninitialized_variables_1/IsVariableInitialized_537" + input: "report_uninitialized_variables_1/IsVariableInitialized_538" + input: "report_uninitialized_variables_1/IsVariableInitialized_539" + input: "report_uninitialized_variables_1/IsVariableInitialized_540" + input: "report_uninitialized_variables_1/IsVariableInitialized_541" + input: "report_uninitialized_variables_1/IsVariableInitialized_542" + input: "report_uninitialized_variables_1/IsVariableInitialized_543" + input: "report_uninitialized_variables_1/IsVariableInitialized_544" + input: "report_uninitialized_variables_1/IsVariableInitialized_545" + input: "report_uninitialized_variables_1/IsVariableInitialized_546" + input: "report_uninitialized_variables_1/IsVariableInitialized_547" + input: "report_uninitialized_variables_1/IsVariableInitialized_548" + input: "report_uninitialized_variables_1/IsVariableInitialized_549" + input: "report_uninitialized_variables_1/IsVariableInitialized_550" + input: "report_uninitialized_variables_1/IsVariableInitialized_551" + input: "report_uninitialized_variables_1/IsVariableInitialized_552" + input: "report_uninitialized_variables_1/IsVariableInitialized_553" + input: "report_uninitialized_variables_1/IsVariableInitialized_554" + input: "report_uninitialized_variables_1/IsVariableInitialized_555" + input: "report_uninitialized_variables_1/IsVariableInitialized_556" + input: "report_uninitialized_variables_1/IsVariableInitialized_557" + input: "report_uninitialized_variables_1/IsVariableInitialized_558" + input: "report_uninitialized_variables_1/IsVariableInitialized_559" + input: "report_uninitialized_variables_1/IsVariableInitialized_560" + input: "report_uninitialized_variables_1/IsVariableInitialized_561" + input: "report_uninitialized_variables_1/IsVariableInitialized_562" + input: "report_uninitialized_variables_1/IsVariableInitialized_563" + input: "report_uninitialized_variables_1/IsVariableInitialized_564" + input: "report_uninitialized_variables_1/IsVariableInitialized_565" + input: "report_uninitialized_variables_1/IsVariableInitialized_566" + input: "report_uninitialized_variables_1/IsVariableInitialized_567" + input: "report_uninitialized_variables_1/IsVariableInitialized_568" + input: "report_uninitialized_variables_1/IsVariableInitialized_569" + input: "report_uninitialized_variables_1/IsVariableInitialized_570" + input: "report_uninitialized_variables_1/IsVariableInitialized_571" + input: "report_uninitialized_variables_1/IsVariableInitialized_572" + input: "report_uninitialized_variables_1/IsVariableInitialized_573" + input: "report_uninitialized_variables_1/IsVariableInitialized_574" + input: "report_uninitialized_variables_1/IsVariableInitialized_575" + input: "report_uninitialized_variables_1/IsVariableInitialized_576" + input: "report_uninitialized_variables_1/IsVariableInitialized_577" + input: "report_uninitialized_variables_1/IsVariableInitialized_578" + input: "report_uninitialized_variables_1/IsVariableInitialized_579" + input: "report_uninitialized_variables_1/IsVariableInitialized_580" + input: "report_uninitialized_variables_1/IsVariableInitialized_581" + input: "report_uninitialized_variables_1/IsVariableInitialized_582" + input: "report_uninitialized_variables_1/IsVariableInitialized_583" + input: "report_uninitialized_variables_1/IsVariableInitialized_584" + input: "report_uninitialized_variables_1/IsVariableInitialized_585" + input: "report_uninitialized_variables_1/IsVariableInitialized_586" + input: "report_uninitialized_variables_1/IsVariableInitialized_587" + input: "report_uninitialized_variables_1/IsVariableInitialized_588" + input: "report_uninitialized_variables_1/IsVariableInitialized_589" + input: "report_uninitialized_variables_1/IsVariableInitialized_590" + input: "report_uninitialized_variables_1/IsVariableInitialized_591" + input: "report_uninitialized_variables_1/IsVariableInitialized_592" + input: "report_uninitialized_variables_1/IsVariableInitialized_593" + input: "report_uninitialized_variables_1/IsVariableInitialized_594" + input: "report_uninitialized_variables_1/IsVariableInitialized_595" + input: "report_uninitialized_variables_1/IsVariableInitialized_596" + input: "report_uninitialized_variables_1/IsVariableInitialized_597" + input: "report_uninitialized_variables_1/IsVariableInitialized_598" + input: "report_uninitialized_variables_1/IsVariableInitialized_599" + input: "report_uninitialized_variables_1/IsVariableInitialized_600" + input: "report_uninitialized_variables_1/IsVariableInitialized_601" + input: "report_uninitialized_variables_1/IsVariableInitialized_602" + device: "/device:CPU:0" + attr { + key: "N" + value { + i: 604 + } + } + attr { + key: "T" + value { + type: DT_BOOL + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 604 + } + } + } + } + } + attr { + key: "axis" + value { + i: 0 + } + } +} +node { + name: "report_uninitialized_variables_1/LogicalNot" + op: "LogicalNot" + input: "report_uninitialized_variables_1/stack" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 604 + } + } + } + } + } +} +node { + name: "report_uninitialized_variables_1/Const" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 604 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 604 + } + } + string_val: "global_step" + string_val: "bert/embeddings/word_embeddings" + string_val: "bert/embeddings/token_type_embeddings" + string_val: "bert/embeddings/position_embeddings" + string_val: "bert/embeddings/LayerNorm/beta" + string_val: "bert/embeddings/LayerNorm/gamma" + string_val: "bert/encoder/layer_0/attention/self/query/kernel" + string_val: "bert/encoder/layer_0/attention/self/query/bias" + string_val: "bert/encoder/layer_0/attention/self/key/kernel" + string_val: "bert/encoder/layer_0/attention/self/key/bias" + string_val: "bert/encoder/layer_0/attention/self/value/kernel" + string_val: "bert/encoder/layer_0/attention/self/value/bias" + string_val: "bert/encoder/layer_0/attention/output/dense/kernel" + string_val: "bert/encoder/layer_0/attention/output/dense/bias" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_0/intermediate/dense/kernel" + string_val: "bert/encoder/layer_0/intermediate/dense/bias" + string_val: "bert/encoder/layer_0/output/dense/kernel" + string_val: "bert/encoder/layer_0/output/dense/bias" + string_val: "bert/encoder/layer_0/output/LayerNorm/beta" + string_val: "bert/encoder/layer_0/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_1/attention/self/query/kernel" + string_val: "bert/encoder/layer_1/attention/self/query/bias" + string_val: "bert/encoder/layer_1/attention/self/key/kernel" + string_val: "bert/encoder/layer_1/attention/self/key/bias" + string_val: "bert/encoder/layer_1/attention/self/value/kernel" + string_val: "bert/encoder/layer_1/attention/self/value/bias" + string_val: "bert/encoder/layer_1/attention/output/dense/kernel" + string_val: "bert/encoder/layer_1/attention/output/dense/bias" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_1/intermediate/dense/kernel" + string_val: "bert/encoder/layer_1/intermediate/dense/bias" + string_val: "bert/encoder/layer_1/output/dense/kernel" + string_val: "bert/encoder/layer_1/output/dense/bias" + string_val: "bert/encoder/layer_1/output/LayerNorm/beta" + string_val: "bert/encoder/layer_1/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_2/attention/self/query/kernel" + string_val: "bert/encoder/layer_2/attention/self/query/bias" + string_val: "bert/encoder/layer_2/attention/self/key/kernel" + string_val: "bert/encoder/layer_2/attention/self/key/bias" + string_val: "bert/encoder/layer_2/attention/self/value/kernel" + string_val: "bert/encoder/layer_2/attention/self/value/bias" + string_val: "bert/encoder/layer_2/attention/output/dense/kernel" + string_val: "bert/encoder/layer_2/attention/output/dense/bias" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_2/intermediate/dense/kernel" + string_val: "bert/encoder/layer_2/intermediate/dense/bias" + string_val: "bert/encoder/layer_2/output/dense/kernel" + string_val: "bert/encoder/layer_2/output/dense/bias" + string_val: "bert/encoder/layer_2/output/LayerNorm/beta" + string_val: "bert/encoder/layer_2/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_3/attention/self/query/kernel" + string_val: "bert/encoder/layer_3/attention/self/query/bias" + string_val: "bert/encoder/layer_3/attention/self/key/kernel" + string_val: "bert/encoder/layer_3/attention/self/key/bias" + string_val: "bert/encoder/layer_3/attention/self/value/kernel" + string_val: "bert/encoder/layer_3/attention/self/value/bias" + string_val: "bert/encoder/layer_3/attention/output/dense/kernel" + string_val: "bert/encoder/layer_3/attention/output/dense/bias" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_3/intermediate/dense/kernel" + string_val: "bert/encoder/layer_3/intermediate/dense/bias" + string_val: "bert/encoder/layer_3/output/dense/kernel" + string_val: "bert/encoder/layer_3/output/dense/bias" + string_val: "bert/encoder/layer_3/output/LayerNorm/beta" + string_val: "bert/encoder/layer_3/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_4/attention/self/query/kernel" + string_val: "bert/encoder/layer_4/attention/self/query/bias" + string_val: "bert/encoder/layer_4/attention/self/key/kernel" + string_val: "bert/encoder/layer_4/attention/self/key/bias" + string_val: "bert/encoder/layer_4/attention/self/value/kernel" + string_val: "bert/encoder/layer_4/attention/self/value/bias" + string_val: "bert/encoder/layer_4/attention/output/dense/kernel" + string_val: "bert/encoder/layer_4/attention/output/dense/bias" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_4/intermediate/dense/kernel" + string_val: "bert/encoder/layer_4/intermediate/dense/bias" + string_val: "bert/encoder/layer_4/output/dense/kernel" + string_val: "bert/encoder/layer_4/output/dense/bias" + string_val: "bert/encoder/layer_4/output/LayerNorm/beta" + string_val: "bert/encoder/layer_4/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_5/attention/self/query/kernel" + string_val: "bert/encoder/layer_5/attention/self/query/bias" + string_val: "bert/encoder/layer_5/attention/self/key/kernel" + string_val: "bert/encoder/layer_5/attention/self/key/bias" + string_val: "bert/encoder/layer_5/attention/self/value/kernel" + string_val: "bert/encoder/layer_5/attention/self/value/bias" + string_val: "bert/encoder/layer_5/attention/output/dense/kernel" + string_val: "bert/encoder/layer_5/attention/output/dense/bias" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_5/intermediate/dense/kernel" + string_val: "bert/encoder/layer_5/intermediate/dense/bias" + string_val: "bert/encoder/layer_5/output/dense/kernel" + string_val: "bert/encoder/layer_5/output/dense/bias" + string_val: "bert/encoder/layer_5/output/LayerNorm/beta" + string_val: "bert/encoder/layer_5/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_6/attention/self/query/kernel" + string_val: "bert/encoder/layer_6/attention/self/query/bias" + string_val: "bert/encoder/layer_6/attention/self/key/kernel" + string_val: "bert/encoder/layer_6/attention/self/key/bias" + string_val: "bert/encoder/layer_6/attention/self/value/kernel" + string_val: "bert/encoder/layer_6/attention/self/value/bias" + string_val: "bert/encoder/layer_6/attention/output/dense/kernel" + string_val: "bert/encoder/layer_6/attention/output/dense/bias" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_6/intermediate/dense/kernel" + string_val: "bert/encoder/layer_6/intermediate/dense/bias" + string_val: "bert/encoder/layer_6/output/dense/kernel" + string_val: "bert/encoder/layer_6/output/dense/bias" + string_val: "bert/encoder/layer_6/output/LayerNorm/beta" + string_val: "bert/encoder/layer_6/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_7/attention/self/query/kernel" + string_val: "bert/encoder/layer_7/attention/self/query/bias" + string_val: "bert/encoder/layer_7/attention/self/key/kernel" + string_val: "bert/encoder/layer_7/attention/self/key/bias" + string_val: "bert/encoder/layer_7/attention/self/value/kernel" + string_val: "bert/encoder/layer_7/attention/self/value/bias" + string_val: "bert/encoder/layer_7/attention/output/dense/kernel" + string_val: "bert/encoder/layer_7/attention/output/dense/bias" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_7/intermediate/dense/kernel" + string_val: "bert/encoder/layer_7/intermediate/dense/bias" + string_val: "bert/encoder/layer_7/output/dense/kernel" + string_val: "bert/encoder/layer_7/output/dense/bias" + string_val: "bert/encoder/layer_7/output/LayerNorm/beta" + string_val: "bert/encoder/layer_7/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_8/attention/self/query/kernel" + string_val: "bert/encoder/layer_8/attention/self/query/bias" + string_val: "bert/encoder/layer_8/attention/self/key/kernel" + string_val: "bert/encoder/layer_8/attention/self/key/bias" + string_val: "bert/encoder/layer_8/attention/self/value/kernel" + string_val: "bert/encoder/layer_8/attention/self/value/bias" + string_val: "bert/encoder/layer_8/attention/output/dense/kernel" + string_val: "bert/encoder/layer_8/attention/output/dense/bias" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_8/intermediate/dense/kernel" + string_val: "bert/encoder/layer_8/intermediate/dense/bias" + string_val: "bert/encoder/layer_8/output/dense/kernel" + string_val: "bert/encoder/layer_8/output/dense/bias" + string_val: "bert/encoder/layer_8/output/LayerNorm/beta" + string_val: "bert/encoder/layer_8/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_9/attention/self/query/kernel" + string_val: "bert/encoder/layer_9/attention/self/query/bias" + string_val: "bert/encoder/layer_9/attention/self/key/kernel" + string_val: "bert/encoder/layer_9/attention/self/key/bias" + string_val: "bert/encoder/layer_9/attention/self/value/kernel" + string_val: "bert/encoder/layer_9/attention/self/value/bias" + string_val: "bert/encoder/layer_9/attention/output/dense/kernel" + string_val: "bert/encoder/layer_9/attention/output/dense/bias" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_9/intermediate/dense/kernel" + string_val: "bert/encoder/layer_9/intermediate/dense/bias" + string_val: "bert/encoder/layer_9/output/dense/kernel" + string_val: "bert/encoder/layer_9/output/dense/bias" + string_val: "bert/encoder/layer_9/output/LayerNorm/beta" + string_val: "bert/encoder/layer_9/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_10/attention/self/query/kernel" + string_val: "bert/encoder/layer_10/attention/self/query/bias" + string_val: "bert/encoder/layer_10/attention/self/key/kernel" + string_val: "bert/encoder/layer_10/attention/self/key/bias" + string_val: "bert/encoder/layer_10/attention/self/value/kernel" + string_val: "bert/encoder/layer_10/attention/self/value/bias" + string_val: "bert/encoder/layer_10/attention/output/dense/kernel" + string_val: "bert/encoder/layer_10/attention/output/dense/bias" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_10/intermediate/dense/kernel" + string_val: "bert/encoder/layer_10/intermediate/dense/bias" + string_val: "bert/encoder/layer_10/output/dense/kernel" + string_val: "bert/encoder/layer_10/output/dense/bias" + string_val: "bert/encoder/layer_10/output/LayerNorm/beta" + string_val: "bert/encoder/layer_10/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_11/attention/self/query/kernel" + string_val: "bert/encoder/layer_11/attention/self/query/bias" + string_val: "bert/encoder/layer_11/attention/self/key/kernel" + string_val: "bert/encoder/layer_11/attention/self/key/bias" + string_val: "bert/encoder/layer_11/attention/self/value/kernel" + string_val: "bert/encoder/layer_11/attention/self/value/bias" + string_val: "bert/encoder/layer_11/attention/output/dense/kernel" + string_val: "bert/encoder/layer_11/attention/output/dense/bias" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_11/intermediate/dense/kernel" + string_val: "bert/encoder/layer_11/intermediate/dense/bias" + string_val: "bert/encoder/layer_11/output/dense/kernel" + string_val: "bert/encoder/layer_11/output/dense/bias" + string_val: "bert/encoder/layer_11/output/LayerNorm/beta" + string_val: "bert/encoder/layer_11/output/LayerNorm/gamma" + string_val: "bert/pooler/dense/kernel" + string_val: "bert/pooler/dense/bias" + string_val: "output_weights" + string_val: "output_bias" + string_val: "bert/embeddings/word_embeddings/adam_m" + string_val: "bert/embeddings/word_embeddings/adam_v" + string_val: "bert/embeddings/token_type_embeddings/adam_m" + string_val: "bert/embeddings/token_type_embeddings/adam_v" + string_val: "bert/embeddings/position_embeddings/adam_m" + string_val: "bert/embeddings/position_embeddings/adam_v" + string_val: "bert/embeddings/LayerNorm/beta/adam_m" + string_val: "bert/embeddings/LayerNorm/beta/adam_v" + string_val: "bert/embeddings/LayerNorm/gamma/adam_m" + string_val: "bert/embeddings/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_0/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_0/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_0/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_0/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_0/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_0/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_0/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_0/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_0/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_0/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_0/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_0/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_0/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_0/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_0/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_0/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_0/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_0/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_0/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_0/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_1/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_1/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_1/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_1/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_1/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_1/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_1/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_1/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_1/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_1/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_1/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_1/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_1/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_1/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_1/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_1/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_1/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_1/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_1/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_1/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_2/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_2/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_2/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_2/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_2/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_2/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_2/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_2/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_2/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_2/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_2/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_2/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_2/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_2/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_2/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_2/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_2/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_2/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_2/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_2/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_3/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_3/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_3/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_3/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_3/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_3/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_3/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_3/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_3/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_3/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_3/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_3/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_3/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_3/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_3/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_3/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_3/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_3/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_3/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_3/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_4/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_4/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_4/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_4/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_4/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_4/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_4/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_4/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_4/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_4/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_4/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_4/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_4/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_4/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_4/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_4/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_4/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_4/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_4/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_4/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_5/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_5/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_5/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_5/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_5/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_5/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_5/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_5/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_5/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_5/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_5/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_5/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_5/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_5/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_5/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_5/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_5/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_5/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_5/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_5/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_6/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_6/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_6/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_6/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_6/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_6/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_6/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_6/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_6/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_6/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_6/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_6/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_6/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_6/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_6/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_6/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_6/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_6/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_6/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_6/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_7/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_7/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_7/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_7/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_7/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_7/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_7/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_7/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_7/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_7/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_7/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_7/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_7/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_7/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_7/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_7/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_7/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_7/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_7/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_7/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_8/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_8/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_8/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_8/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_8/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_8/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_8/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_8/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_8/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_8/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_8/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_8/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_8/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_8/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_8/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_8/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_8/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_8/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_8/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_8/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_9/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_9/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_9/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_9/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_9/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_9/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_9/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_9/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_9/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_9/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_9/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_9/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_9/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_9/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_9/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_9/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_9/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_9/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_9/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_9/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_10/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_10/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_10/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_10/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_10/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_10/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_10/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_10/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_10/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_10/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_10/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_10/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_10/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_10/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_10/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_10/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_10/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_10/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_10/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_10/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_11/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_11/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_11/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_11/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_11/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_11/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_11/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_11/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_11/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_11/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_11/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_11/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_11/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_11/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_11/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_11/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_11/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_11/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_11/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_11/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v" + string_val: "bert/pooler/dense/kernel/adam_m" + string_val: "bert/pooler/dense/kernel/adam_v" + string_val: "bert/pooler/dense/bias/adam_m" + string_val: "bert/pooler/dense/bias/adam_v" + string_val: "output_weights/adam_m" + string_val: "output_weights/adam_v" + string_val: "output_bias/adam_m" + string_val: "output_bias/adam_v" + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/Shape" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 604 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/strided_slice/stack" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/strided_slice/stack_1" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/strided_slice/stack_2" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/strided_slice" + op: "StridedSlice" + input: "report_uninitialized_variables_1/boolean_mask/Shape" + input: "report_uninitialized_variables_1/boolean_mask/strided_slice/stack" + input: "report_uninitialized_variables_1/boolean_mask/strided_slice/stack_1" + input: "report_uninitialized_variables_1/boolean_mask/strided_slice/stack_2" + device: "/device:CPU:0" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/Prod/reduction_indices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/Prod" + op: "Prod" + input: "report_uninitialized_variables_1/boolean_mask/strided_slice" + input: "report_uninitialized_variables_1/boolean_mask/Prod/reduction_indices" + device: "/device:CPU:0" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/Shape_1" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 604 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/strided_slice_1/stack" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/strided_slice_1/stack_1" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/strided_slice_1/stack_2" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/strided_slice_1" + op: "StridedSlice" + input: "report_uninitialized_variables_1/boolean_mask/Shape_1" + input: "report_uninitialized_variables_1/boolean_mask/strided_slice_1/stack" + input: "report_uninitialized_variables_1/boolean_mask/strided_slice_1/stack_1" + input: "report_uninitialized_variables_1/boolean_mask/strided_slice_1/stack_2" + device: "/device:CPU:0" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 1 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/Shape_2" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 604 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/strided_slice_2/stack" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/strided_slice_2/stack_1" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/strided_slice_2/stack_2" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/strided_slice_2" + op: "StridedSlice" + input: "report_uninitialized_variables_1/boolean_mask/Shape_2" + input: "report_uninitialized_variables_1/boolean_mask/strided_slice_2/stack" + input: "report_uninitialized_variables_1/boolean_mask/strided_slice_2/stack_1" + input: "report_uninitialized_variables_1/boolean_mask/strided_slice_2/stack_2" + device: "/device:CPU:0" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 1 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/concat/values_1" + op: "Pack" + input: "report_uninitialized_variables_1/boolean_mask/Prod" + device: "/device:CPU:0" + attr { + key: "N" + value { + i: 1 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "axis" + value { + i: 0 + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/concat/axis" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/concat" + op: "ConcatV2" + input: "report_uninitialized_variables_1/boolean_mask/strided_slice_1" + input: "report_uninitialized_variables_1/boolean_mask/concat/values_1" + input: "report_uninitialized_variables_1/boolean_mask/strided_slice_2" + input: "report_uninitialized_variables_1/boolean_mask/concat/axis" + device: "/device:CPU:0" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/Reshape" + op: "Reshape" + input: "report_uninitialized_variables_1/Const" + input: "report_uninitialized_variables_1/boolean_mask/concat" + device: "/device:CPU:0" + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 604 + } + } + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/Reshape_1/shape" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -1 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/Reshape_1" + op: "Reshape" + input: "report_uninitialized_variables_1/LogicalNot" + input: "report_uninitialized_variables_1/boolean_mask/Reshape_1/shape" + device: "/device:CPU:0" + attr { + key: "T" + value { + type: DT_BOOL + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 604 + } + } + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/Where" + op: "Where" + input: "report_uninitialized_variables_1/boolean_mask/Reshape_1" + device: "/device:CPU:0" + attr { + key: "T" + value { + type: DT_BOOL + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/Squeeze" + op: "Squeeze" + input: "report_uninitialized_variables_1/boolean_mask/Where" + device: "/device:CPU:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + } + } + } + attr { + key: "squeeze_dims" + value { + list { + i: 1 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/GatherV2/axis" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/GatherV2" + op: "GatherV2" + input: "report_uninitialized_variables_1/boolean_mask/Reshape" + input: "report_uninitialized_variables_1/boolean_mask/Squeeze" + input: "report_uninitialized_variables_1/boolean_mask/GatherV2/axis" + device: "/device:CPU:0" + attr { + key: "Taxis" + value { + type: DT_INT32 + } + } + attr { + key: "Tindices" + value { + type: DT_INT64 + } + } + attr { + key: "Tparams" + value { + type: DT_STRING + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + } + } + } + attr { + key: "batch_dims" + value { + i: 0 + } + } +} +node { + name: "report_uninitialized_resources_1/Const" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "concat_1/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "concat_1" + op: "ConcatV2" + input: "report_uninitialized_variables_1/boolean_mask/GatherV2" + input: "report_uninitialized_resources_1/Const" + input: "concat_1/axis" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "init_2" + op: "NoOp" +} +node { + name: "init_all_tables" + op: "NoOp" +} +node { + name: "init_3" + op: "NoOp" +} +node { + name: "group_deps_3" + op: "NoOp" + input: "^init_2" + input: "^init_3" + input: "^init_all_tables" +} +node { + name: "Merge/MergeSummary" + op: "MergeSummary" + input: "loss_1" + attr { + key: "N" + value { + i: 1 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "save/filename/input" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "model" + } + } + } +} +node { + name: "save/filename" + op: "PlaceholderWithDefault" + input: "save/filename/input" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "shape" + value { + shape { + } + } + } +} +node { + name: "save/Const" + op: "PlaceholderWithDefault" + input: "save/filename" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "shape" + value { + shape { + } + } + } +} +node { + name: "save/StringJoin/inputs_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "_temp_823f412404fa4e59893c248d5a436737/part" + } + } + } +} +node { + name: "save/StringJoin" + op: "StringJoin" + input: "save/Const" + input: "save/StringJoin/inputs_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "separator" + value { + s: "" + } + } +} +node { + name: "save/num_shards" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "save/ShardedFilename/shard" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "save/ShardedFilename" + op: "ShardedFilename" + input: "save/StringJoin" + input: "save/ShardedFilename/shard" + input: "save/num_shards" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "save/SaveV2/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 604 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 604 + } + } + string_val: "bert/embeddings/LayerNorm/beta" + string_val: "bert/embeddings/LayerNorm/beta/adam_m" + string_val: "bert/embeddings/LayerNorm/beta/adam_v" + string_val: "bert/embeddings/LayerNorm/gamma" + string_val: "bert/embeddings/LayerNorm/gamma/adam_m" + string_val: "bert/embeddings/LayerNorm/gamma/adam_v" + string_val: "bert/embeddings/position_embeddings" + string_val: "bert/embeddings/position_embeddings/adam_m" + string_val: "bert/embeddings/position_embeddings/adam_v" + string_val: "bert/embeddings/token_type_embeddings" + string_val: "bert/embeddings/token_type_embeddings/adam_m" + string_val: "bert/embeddings/token_type_embeddings/adam_v" + string_val: "bert/embeddings/word_embeddings" + string_val: "bert/embeddings/word_embeddings/adam_m" + string_val: "bert/embeddings/word_embeddings/adam_v" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_0/attention/output/dense/bias" + string_val: "bert/encoder/layer_0/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_0/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_0/attention/output/dense/kernel" + string_val: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_0/attention/self/key/bias" + string_val: "bert/encoder/layer_0/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_0/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_0/attention/self/key/kernel" + string_val: "bert/encoder/layer_0/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_0/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_0/attention/self/query/bias" + string_val: "bert/encoder/layer_0/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_0/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_0/attention/self/query/kernel" + string_val: "bert/encoder/layer_0/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_0/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_0/attention/self/value/bias" + string_val: "bert/encoder/layer_0/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_0/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_0/attention/self/value/kernel" + string_val: "bert/encoder/layer_0/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_0/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_0/intermediate/dense/bias" + string_val: "bert/encoder/layer_0/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_0/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_0/intermediate/dense/kernel" + string_val: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_0/output/LayerNorm/beta" + string_val: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_0/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_0/output/dense/bias" + string_val: "bert/encoder/layer_0/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_0/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_0/output/dense/kernel" + string_val: "bert/encoder/layer_0/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_0/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_1/attention/output/dense/bias" + string_val: "bert/encoder/layer_1/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_1/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_1/attention/output/dense/kernel" + string_val: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_1/attention/self/key/bias" + string_val: "bert/encoder/layer_1/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_1/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_1/attention/self/key/kernel" + string_val: "bert/encoder/layer_1/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_1/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_1/attention/self/query/bias" + string_val: "bert/encoder/layer_1/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_1/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_1/attention/self/query/kernel" + string_val: "bert/encoder/layer_1/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_1/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_1/attention/self/value/bias" + string_val: "bert/encoder/layer_1/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_1/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_1/attention/self/value/kernel" + string_val: "bert/encoder/layer_1/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_1/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_1/intermediate/dense/bias" + string_val: "bert/encoder/layer_1/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_1/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_1/intermediate/dense/kernel" + string_val: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_1/output/LayerNorm/beta" + string_val: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_1/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_1/output/dense/bias" + string_val: "bert/encoder/layer_1/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_1/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_1/output/dense/kernel" + string_val: "bert/encoder/layer_1/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_1/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_10/attention/output/dense/bias" + string_val: "bert/encoder/layer_10/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_10/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_10/attention/output/dense/kernel" + string_val: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_10/attention/self/key/bias" + string_val: "bert/encoder/layer_10/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_10/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_10/attention/self/key/kernel" + string_val: "bert/encoder/layer_10/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_10/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_10/attention/self/query/bias" + string_val: "bert/encoder/layer_10/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_10/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_10/attention/self/query/kernel" + string_val: "bert/encoder/layer_10/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_10/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_10/attention/self/value/bias" + string_val: "bert/encoder/layer_10/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_10/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_10/attention/self/value/kernel" + string_val: "bert/encoder/layer_10/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_10/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_10/intermediate/dense/bias" + string_val: "bert/encoder/layer_10/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_10/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_10/intermediate/dense/kernel" + string_val: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_10/output/LayerNorm/beta" + string_val: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_10/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_10/output/dense/bias" + string_val: "bert/encoder/layer_10/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_10/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_10/output/dense/kernel" + string_val: "bert/encoder/layer_10/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_10/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_11/attention/output/dense/bias" + string_val: "bert/encoder/layer_11/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_11/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_11/attention/output/dense/kernel" + string_val: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_11/attention/self/key/bias" + string_val: "bert/encoder/layer_11/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_11/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_11/attention/self/key/kernel" + string_val: "bert/encoder/layer_11/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_11/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_11/attention/self/query/bias" + string_val: "bert/encoder/layer_11/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_11/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_11/attention/self/query/kernel" + string_val: "bert/encoder/layer_11/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_11/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_11/attention/self/value/bias" + string_val: "bert/encoder/layer_11/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_11/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_11/attention/self/value/kernel" + string_val: "bert/encoder/layer_11/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_11/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_11/intermediate/dense/bias" + string_val: "bert/encoder/layer_11/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_11/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_11/intermediate/dense/kernel" + string_val: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_11/output/LayerNorm/beta" + string_val: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_11/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_11/output/dense/bias" + string_val: "bert/encoder/layer_11/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_11/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_11/output/dense/kernel" + string_val: "bert/encoder/layer_11/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_11/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_2/attention/output/dense/bias" + string_val: "bert/encoder/layer_2/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_2/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_2/attention/output/dense/kernel" + string_val: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_2/attention/self/key/bias" + string_val: "bert/encoder/layer_2/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_2/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_2/attention/self/key/kernel" + string_val: "bert/encoder/layer_2/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_2/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_2/attention/self/query/bias" + string_val: "bert/encoder/layer_2/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_2/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_2/attention/self/query/kernel" + string_val: "bert/encoder/layer_2/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_2/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_2/attention/self/value/bias" + string_val: "bert/encoder/layer_2/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_2/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_2/attention/self/value/kernel" + string_val: "bert/encoder/layer_2/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_2/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_2/intermediate/dense/bias" + string_val: "bert/encoder/layer_2/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_2/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_2/intermediate/dense/kernel" + string_val: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_2/output/LayerNorm/beta" + string_val: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_2/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_2/output/dense/bias" + string_val: "bert/encoder/layer_2/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_2/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_2/output/dense/kernel" + string_val: "bert/encoder/layer_2/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_2/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_3/attention/output/dense/bias" + string_val: "bert/encoder/layer_3/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_3/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_3/attention/output/dense/kernel" + string_val: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_3/attention/self/key/bias" + string_val: "bert/encoder/layer_3/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_3/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_3/attention/self/key/kernel" + string_val: "bert/encoder/layer_3/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_3/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_3/attention/self/query/bias" + string_val: "bert/encoder/layer_3/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_3/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_3/attention/self/query/kernel" + string_val: "bert/encoder/layer_3/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_3/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_3/attention/self/value/bias" + string_val: "bert/encoder/layer_3/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_3/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_3/attention/self/value/kernel" + string_val: "bert/encoder/layer_3/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_3/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_3/intermediate/dense/bias" + string_val: "bert/encoder/layer_3/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_3/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_3/intermediate/dense/kernel" + string_val: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_3/output/LayerNorm/beta" + string_val: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_3/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_3/output/dense/bias" + string_val: "bert/encoder/layer_3/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_3/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_3/output/dense/kernel" + string_val: "bert/encoder/layer_3/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_3/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_4/attention/output/dense/bias" + string_val: "bert/encoder/layer_4/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_4/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_4/attention/output/dense/kernel" + string_val: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_4/attention/self/key/bias" + string_val: "bert/encoder/layer_4/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_4/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_4/attention/self/key/kernel" + string_val: "bert/encoder/layer_4/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_4/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_4/attention/self/query/bias" + string_val: "bert/encoder/layer_4/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_4/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_4/attention/self/query/kernel" + string_val: "bert/encoder/layer_4/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_4/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_4/attention/self/value/bias" + string_val: "bert/encoder/layer_4/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_4/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_4/attention/self/value/kernel" + string_val: "bert/encoder/layer_4/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_4/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_4/intermediate/dense/bias" + string_val: "bert/encoder/layer_4/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_4/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_4/intermediate/dense/kernel" + string_val: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_4/output/LayerNorm/beta" + string_val: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_4/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_4/output/dense/bias" + string_val: "bert/encoder/layer_4/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_4/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_4/output/dense/kernel" + string_val: "bert/encoder/layer_4/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_4/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_5/attention/output/dense/bias" + string_val: "bert/encoder/layer_5/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_5/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_5/attention/output/dense/kernel" + string_val: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_5/attention/self/key/bias" + string_val: "bert/encoder/layer_5/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_5/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_5/attention/self/key/kernel" + string_val: "bert/encoder/layer_5/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_5/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_5/attention/self/query/bias" + string_val: "bert/encoder/layer_5/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_5/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_5/attention/self/query/kernel" + string_val: "bert/encoder/layer_5/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_5/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_5/attention/self/value/bias" + string_val: "bert/encoder/layer_5/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_5/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_5/attention/self/value/kernel" + string_val: "bert/encoder/layer_5/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_5/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_5/intermediate/dense/bias" + string_val: "bert/encoder/layer_5/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_5/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_5/intermediate/dense/kernel" + string_val: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_5/output/LayerNorm/beta" + string_val: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_5/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_5/output/dense/bias" + string_val: "bert/encoder/layer_5/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_5/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_5/output/dense/kernel" + string_val: "bert/encoder/layer_5/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_5/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_6/attention/output/dense/bias" + string_val: "bert/encoder/layer_6/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_6/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_6/attention/output/dense/kernel" + string_val: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_6/attention/self/key/bias" + string_val: "bert/encoder/layer_6/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_6/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_6/attention/self/key/kernel" + string_val: "bert/encoder/layer_6/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_6/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_6/attention/self/query/bias" + string_val: "bert/encoder/layer_6/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_6/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_6/attention/self/query/kernel" + string_val: "bert/encoder/layer_6/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_6/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_6/attention/self/value/bias" + string_val: "bert/encoder/layer_6/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_6/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_6/attention/self/value/kernel" + string_val: "bert/encoder/layer_6/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_6/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_6/intermediate/dense/bias" + string_val: "bert/encoder/layer_6/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_6/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_6/intermediate/dense/kernel" + string_val: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_6/output/LayerNorm/beta" + string_val: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_6/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_6/output/dense/bias" + string_val: "bert/encoder/layer_6/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_6/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_6/output/dense/kernel" + string_val: "bert/encoder/layer_6/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_6/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_7/attention/output/dense/bias" + string_val: "bert/encoder/layer_7/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_7/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_7/attention/output/dense/kernel" + string_val: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_7/attention/self/key/bias" + string_val: "bert/encoder/layer_7/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_7/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_7/attention/self/key/kernel" + string_val: "bert/encoder/layer_7/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_7/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_7/attention/self/query/bias" + string_val: "bert/encoder/layer_7/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_7/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_7/attention/self/query/kernel" + string_val: "bert/encoder/layer_7/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_7/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_7/attention/self/value/bias" + string_val: "bert/encoder/layer_7/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_7/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_7/attention/self/value/kernel" + string_val: "bert/encoder/layer_7/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_7/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_7/intermediate/dense/bias" + string_val: "bert/encoder/layer_7/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_7/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_7/intermediate/dense/kernel" + string_val: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_7/output/LayerNorm/beta" + string_val: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_7/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_7/output/dense/bias" + string_val: "bert/encoder/layer_7/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_7/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_7/output/dense/kernel" + string_val: "bert/encoder/layer_7/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_7/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_8/attention/output/dense/bias" + string_val: "bert/encoder/layer_8/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_8/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_8/attention/output/dense/kernel" + string_val: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_8/attention/self/key/bias" + string_val: "bert/encoder/layer_8/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_8/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_8/attention/self/key/kernel" + string_val: "bert/encoder/layer_8/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_8/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_8/attention/self/query/bias" + string_val: "bert/encoder/layer_8/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_8/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_8/attention/self/query/kernel" + string_val: "bert/encoder/layer_8/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_8/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_8/attention/self/value/bias" + string_val: "bert/encoder/layer_8/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_8/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_8/attention/self/value/kernel" + string_val: "bert/encoder/layer_8/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_8/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_8/intermediate/dense/bias" + string_val: "bert/encoder/layer_8/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_8/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_8/intermediate/dense/kernel" + string_val: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_8/output/LayerNorm/beta" + string_val: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_8/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_8/output/dense/bias" + string_val: "bert/encoder/layer_8/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_8/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_8/output/dense/kernel" + string_val: "bert/encoder/layer_8/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_8/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_9/attention/output/dense/bias" + string_val: "bert/encoder/layer_9/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_9/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_9/attention/output/dense/kernel" + string_val: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_9/attention/self/key/bias" + string_val: "bert/encoder/layer_9/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_9/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_9/attention/self/key/kernel" + string_val: "bert/encoder/layer_9/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_9/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_9/attention/self/query/bias" + string_val: "bert/encoder/layer_9/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_9/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_9/attention/self/query/kernel" + string_val: "bert/encoder/layer_9/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_9/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_9/attention/self/value/bias" + string_val: "bert/encoder/layer_9/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_9/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_9/attention/self/value/kernel" + string_val: "bert/encoder/layer_9/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_9/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_9/intermediate/dense/bias" + string_val: "bert/encoder/layer_9/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_9/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_9/intermediate/dense/kernel" + string_val: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_9/output/LayerNorm/beta" + string_val: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_9/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_9/output/dense/bias" + string_val: "bert/encoder/layer_9/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_9/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_9/output/dense/kernel" + string_val: "bert/encoder/layer_9/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_9/output/dense/kernel/adam_v" + string_val: "bert/pooler/dense/bias" + string_val: "bert/pooler/dense/bias/adam_m" + string_val: "bert/pooler/dense/bias/adam_v" + string_val: "bert/pooler/dense/kernel" + string_val: "bert/pooler/dense/kernel/adam_m" + string_val: "bert/pooler/dense/kernel/adam_v" + string_val: "global_step" + string_val: "output_bias" + string_val: "output_bias/adam_m" + string_val: "output_bias/adam_v" + string_val: "output_weights" + string_val: "output_weights/adam_m" + string_val: "output_weights/adam_v" + } + } + } +} +node { + name: "save/SaveV2/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 604 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 604 + } + } + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + } + } + } +} +node { + name: "save/SaveV2" + op: "SaveV2" + input: "save/ShardedFilename" + input: "save/SaveV2/tensor_names" + input: "save/SaveV2/shape_and_slices" + input: "bert/embeddings/LayerNorm/beta" + input: "bert/embeddings/LayerNorm/beta/adam_m" + input: "bert/embeddings/LayerNorm/beta/adam_v" + input: "bert/embeddings/LayerNorm/gamma" + input: "bert/embeddings/LayerNorm/gamma/adam_m" + input: "bert/embeddings/LayerNorm/gamma/adam_v" + input: "bert/embeddings/position_embeddings" + input: "bert/embeddings/position_embeddings/adam_m" + input: "bert/embeddings/position_embeddings/adam_v" + input: "bert/embeddings/token_type_embeddings" + input: "bert/embeddings/token_type_embeddings/adam_m" + input: "bert/embeddings/token_type_embeddings/adam_v" + input: "bert/embeddings/word_embeddings" + input: "bert/embeddings/word_embeddings/adam_m" + input: "bert/embeddings/word_embeddings/adam_v" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_0/attention/output/dense/bias" + input: "bert/encoder/layer_0/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_0/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_0/attention/output/dense/kernel" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_0/attention/self/key/bias" + input: "bert/encoder/layer_0/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_0/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_0/attention/self/key/kernel" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_0/attention/self/query/bias" + input: "bert/encoder/layer_0/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_0/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_0/attention/self/query/kernel" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_0/attention/self/value/bias" + input: "bert/encoder/layer_0/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_0/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_0/attention/self/value/kernel" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_0/intermediate/dense/bias" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_0/intermediate/dense/kernel" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_0/output/LayerNorm/beta" + input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_0/output/LayerNorm/gamma" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_0/output/dense/bias" + input: "bert/encoder/layer_0/output/dense/bias/adam_m" + input: "bert/encoder/layer_0/output/dense/bias/adam_v" + input: "bert/encoder/layer_0/output/dense/kernel" + input: "bert/encoder/layer_0/output/dense/kernel/adam_m" + input: "bert/encoder/layer_0/output/dense/kernel/adam_v" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_1/attention/output/dense/bias" + input: "bert/encoder/layer_1/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_1/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_1/attention/output/dense/kernel" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_1/attention/self/key/bias" + input: "bert/encoder/layer_1/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_1/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_1/attention/self/key/kernel" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_1/attention/self/query/bias" + input: "bert/encoder/layer_1/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_1/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_1/attention/self/query/kernel" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_1/attention/self/value/bias" + input: "bert/encoder/layer_1/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_1/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_1/attention/self/value/kernel" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_1/intermediate/dense/bias" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_1/intermediate/dense/kernel" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_1/output/LayerNorm/beta" + input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_1/output/LayerNorm/gamma" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_1/output/dense/bias" + input: "bert/encoder/layer_1/output/dense/bias/adam_m" + input: "bert/encoder/layer_1/output/dense/bias/adam_v" + input: "bert/encoder/layer_1/output/dense/kernel" + input: "bert/encoder/layer_1/output/dense/kernel/adam_m" + input: "bert/encoder/layer_1/output/dense/kernel/adam_v" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_10/attention/output/dense/bias" + input: "bert/encoder/layer_10/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_10/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_10/attention/output/dense/kernel" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_10/attention/self/key/bias" + input: "bert/encoder/layer_10/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_10/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_10/attention/self/key/kernel" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_10/attention/self/query/bias" + input: "bert/encoder/layer_10/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_10/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_10/attention/self/query/kernel" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_10/attention/self/value/bias" + input: "bert/encoder/layer_10/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_10/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_10/attention/self/value/kernel" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_10/intermediate/dense/bias" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_10/intermediate/dense/kernel" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_10/output/LayerNorm/beta" + input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_10/output/LayerNorm/gamma" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_10/output/dense/bias" + input: "bert/encoder/layer_10/output/dense/bias/adam_m" + input: "bert/encoder/layer_10/output/dense/bias/adam_v" + input: "bert/encoder/layer_10/output/dense/kernel" + input: "bert/encoder/layer_10/output/dense/kernel/adam_m" + input: "bert/encoder/layer_10/output/dense/kernel/adam_v" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_11/attention/output/dense/bias" + input: "bert/encoder/layer_11/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_11/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_11/attention/output/dense/kernel" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_11/attention/self/key/bias" + input: "bert/encoder/layer_11/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_11/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_11/attention/self/key/kernel" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_11/attention/self/query/bias" + input: "bert/encoder/layer_11/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_11/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_11/attention/self/query/kernel" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_11/attention/self/value/bias" + input: "bert/encoder/layer_11/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_11/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_11/attention/self/value/kernel" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_11/intermediate/dense/bias" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_11/intermediate/dense/kernel" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_11/output/LayerNorm/beta" + input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_11/output/LayerNorm/gamma" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_11/output/dense/bias" + input: "bert/encoder/layer_11/output/dense/bias/adam_m" + input: "bert/encoder/layer_11/output/dense/bias/adam_v" + input: "bert/encoder/layer_11/output/dense/kernel" + input: "bert/encoder/layer_11/output/dense/kernel/adam_m" + input: "bert/encoder/layer_11/output/dense/kernel/adam_v" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_2/attention/output/dense/bias" + input: "bert/encoder/layer_2/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_2/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_2/attention/output/dense/kernel" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_2/attention/self/key/bias" + input: "bert/encoder/layer_2/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_2/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_2/attention/self/key/kernel" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_2/attention/self/query/bias" + input: "bert/encoder/layer_2/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_2/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_2/attention/self/query/kernel" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_2/attention/self/value/bias" + input: "bert/encoder/layer_2/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_2/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_2/attention/self/value/kernel" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_2/intermediate/dense/bias" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_2/intermediate/dense/kernel" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_2/output/LayerNorm/beta" + input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_2/output/LayerNorm/gamma" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_2/output/dense/bias" + input: "bert/encoder/layer_2/output/dense/bias/adam_m" + input: "bert/encoder/layer_2/output/dense/bias/adam_v" + input: "bert/encoder/layer_2/output/dense/kernel" + input: "bert/encoder/layer_2/output/dense/kernel/adam_m" + input: "bert/encoder/layer_2/output/dense/kernel/adam_v" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_3/attention/output/dense/bias" + input: "bert/encoder/layer_3/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_3/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_3/attention/output/dense/kernel" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_3/attention/self/key/bias" + input: "bert/encoder/layer_3/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_3/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_3/attention/self/key/kernel" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_3/attention/self/query/bias" + input: "bert/encoder/layer_3/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_3/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_3/attention/self/query/kernel" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_3/attention/self/value/bias" + input: "bert/encoder/layer_3/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_3/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_3/attention/self/value/kernel" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_3/intermediate/dense/bias" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_3/intermediate/dense/kernel" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_3/output/LayerNorm/beta" + input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_3/output/LayerNorm/gamma" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_3/output/dense/bias" + input: "bert/encoder/layer_3/output/dense/bias/adam_m" + input: "bert/encoder/layer_3/output/dense/bias/adam_v" + input: "bert/encoder/layer_3/output/dense/kernel" + input: "bert/encoder/layer_3/output/dense/kernel/adam_m" + input: "bert/encoder/layer_3/output/dense/kernel/adam_v" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_4/attention/output/dense/bias" + input: "bert/encoder/layer_4/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_4/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_4/attention/output/dense/kernel" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_4/attention/self/key/bias" + input: "bert/encoder/layer_4/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_4/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_4/attention/self/key/kernel" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_4/attention/self/query/bias" + input: "bert/encoder/layer_4/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_4/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_4/attention/self/query/kernel" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_4/attention/self/value/bias" + input: "bert/encoder/layer_4/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_4/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_4/attention/self/value/kernel" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_4/intermediate/dense/bias" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_4/intermediate/dense/kernel" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_4/output/LayerNorm/beta" + input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_4/output/LayerNorm/gamma" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_4/output/dense/bias" + input: "bert/encoder/layer_4/output/dense/bias/adam_m" + input: "bert/encoder/layer_4/output/dense/bias/adam_v" + input: "bert/encoder/layer_4/output/dense/kernel" + input: "bert/encoder/layer_4/output/dense/kernel/adam_m" + input: "bert/encoder/layer_4/output/dense/kernel/adam_v" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_5/attention/output/dense/bias" + input: "bert/encoder/layer_5/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_5/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_5/attention/output/dense/kernel" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_5/attention/self/key/bias" + input: "bert/encoder/layer_5/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_5/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_5/attention/self/key/kernel" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_5/attention/self/query/bias" + input: "bert/encoder/layer_5/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_5/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_5/attention/self/query/kernel" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_5/attention/self/value/bias" + input: "bert/encoder/layer_5/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_5/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_5/attention/self/value/kernel" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_5/intermediate/dense/bias" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_5/intermediate/dense/kernel" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_5/output/LayerNorm/beta" + input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_5/output/LayerNorm/gamma" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_5/output/dense/bias" + input: "bert/encoder/layer_5/output/dense/bias/adam_m" + input: "bert/encoder/layer_5/output/dense/bias/adam_v" + input: "bert/encoder/layer_5/output/dense/kernel" + input: "bert/encoder/layer_5/output/dense/kernel/adam_m" + input: "bert/encoder/layer_5/output/dense/kernel/adam_v" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_6/attention/output/dense/bias" + input: "bert/encoder/layer_6/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_6/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_6/attention/output/dense/kernel" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_6/attention/self/key/bias" + input: "bert/encoder/layer_6/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_6/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_6/attention/self/key/kernel" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_6/attention/self/query/bias" + input: "bert/encoder/layer_6/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_6/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_6/attention/self/query/kernel" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_6/attention/self/value/bias" + input: "bert/encoder/layer_6/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_6/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_6/attention/self/value/kernel" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_6/intermediate/dense/bias" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_6/intermediate/dense/kernel" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_6/output/LayerNorm/beta" + input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_6/output/LayerNorm/gamma" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_6/output/dense/bias" + input: "bert/encoder/layer_6/output/dense/bias/adam_m" + input: "bert/encoder/layer_6/output/dense/bias/adam_v" + input: "bert/encoder/layer_6/output/dense/kernel" + input: "bert/encoder/layer_6/output/dense/kernel/adam_m" + input: "bert/encoder/layer_6/output/dense/kernel/adam_v" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_7/attention/output/dense/bias" + input: "bert/encoder/layer_7/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_7/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_7/attention/output/dense/kernel" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_7/attention/self/key/bias" + input: "bert/encoder/layer_7/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_7/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_7/attention/self/key/kernel" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_7/attention/self/query/bias" + input: "bert/encoder/layer_7/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_7/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_7/attention/self/query/kernel" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_7/attention/self/value/bias" + input: "bert/encoder/layer_7/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_7/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_7/attention/self/value/kernel" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_7/intermediate/dense/bias" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_7/intermediate/dense/kernel" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_7/output/LayerNorm/beta" + input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_7/output/LayerNorm/gamma" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_7/output/dense/bias" + input: "bert/encoder/layer_7/output/dense/bias/adam_m" + input: "bert/encoder/layer_7/output/dense/bias/adam_v" + input: "bert/encoder/layer_7/output/dense/kernel" + input: "bert/encoder/layer_7/output/dense/kernel/adam_m" + input: "bert/encoder/layer_7/output/dense/kernel/adam_v" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_8/attention/output/dense/bias" + input: "bert/encoder/layer_8/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_8/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_8/attention/output/dense/kernel" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_8/attention/self/key/bias" + input: "bert/encoder/layer_8/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_8/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_8/attention/self/key/kernel" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_8/attention/self/query/bias" + input: "bert/encoder/layer_8/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_8/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_8/attention/self/query/kernel" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_8/attention/self/value/bias" + input: "bert/encoder/layer_8/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_8/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_8/attention/self/value/kernel" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_8/intermediate/dense/bias" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_8/intermediate/dense/kernel" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_8/output/LayerNorm/beta" + input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_8/output/LayerNorm/gamma" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_8/output/dense/bias" + input: "bert/encoder/layer_8/output/dense/bias/adam_m" + input: "bert/encoder/layer_8/output/dense/bias/adam_v" + input: "bert/encoder/layer_8/output/dense/kernel" + input: "bert/encoder/layer_8/output/dense/kernel/adam_m" + input: "bert/encoder/layer_8/output/dense/kernel/adam_v" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_9/attention/output/dense/bias" + input: "bert/encoder/layer_9/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_9/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_9/attention/output/dense/kernel" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_9/attention/self/key/bias" + input: "bert/encoder/layer_9/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_9/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_9/attention/self/key/kernel" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_9/attention/self/query/bias" + input: "bert/encoder/layer_9/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_9/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_9/attention/self/query/kernel" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_9/attention/self/value/bias" + input: "bert/encoder/layer_9/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_9/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_9/attention/self/value/kernel" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_9/intermediate/dense/bias" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_9/intermediate/dense/kernel" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_9/output/LayerNorm/beta" + input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_9/output/LayerNorm/gamma" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_9/output/dense/bias" + input: "bert/encoder/layer_9/output/dense/bias/adam_m" + input: "bert/encoder/layer_9/output/dense/bias/adam_v" + input: "bert/encoder/layer_9/output/dense/kernel" + input: "bert/encoder/layer_9/output/dense/kernel/adam_m" + input: "bert/encoder/layer_9/output/dense/kernel/adam_v" + input: "bert/pooler/dense/bias" + input: "bert/pooler/dense/bias/adam_m" + input: "bert/pooler/dense/bias/adam_v" + input: "bert/pooler/dense/kernel" + input: "bert/pooler/dense/kernel/adam_m" + input: "bert/pooler/dense/kernel/adam_v" + input: "global_step/Read/ReadVariableOp" + input: "output_bias" + input: "output_bias/adam_m" + input: "output_bias/adam_v" + input: "output_weights" + input: "output_weights/adam_m" + input: "output_weights/adam_v" + device: "/device:CPU:0" + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_INT64 + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + } + } + } +} +node { + name: "save/control_dependency" + op: "Identity" + input: "save/ShardedFilename" + input: "^save/SaveV2" + device: "/device:CPU:0" + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "_class" + value { + list { + s: "loc:@save/ShardedFilename" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "save/MergeV2Checkpoints/checkpoint_prefixes" + op: "Pack" + input: "save/ShardedFilename" + input: "^save/control_dependency" + device: "/device:CPU:0" + attr { + key: "N" + value { + i: 1 + } + } + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "axis" + value { + i: 0 + } + } +} +node { + name: "save/MergeV2Checkpoints" + op: "MergeV2Checkpoints" + input: "save/MergeV2Checkpoints/checkpoint_prefixes" + input: "save/Const" + device: "/device:CPU:0" + attr { + key: "delete_old_dirs" + value { + b: true + } + } +} +node { + name: "save/Identity" + op: "Identity" + input: "save/Const" + input: "^save/MergeV2Checkpoints" + input: "^save/control_dependency" + device: "/device:CPU:0" + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "save/RestoreV2/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 604 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 604 + } + } + string_val: "bert/embeddings/LayerNorm/beta" + string_val: "bert/embeddings/LayerNorm/beta/adam_m" + string_val: "bert/embeddings/LayerNorm/beta/adam_v" + string_val: "bert/embeddings/LayerNorm/gamma" + string_val: "bert/embeddings/LayerNorm/gamma/adam_m" + string_val: "bert/embeddings/LayerNorm/gamma/adam_v" + string_val: "bert/embeddings/position_embeddings" + string_val: "bert/embeddings/position_embeddings/adam_m" + string_val: "bert/embeddings/position_embeddings/adam_v" + string_val: "bert/embeddings/token_type_embeddings" + string_val: "bert/embeddings/token_type_embeddings/adam_m" + string_val: "bert/embeddings/token_type_embeddings/adam_v" + string_val: "bert/embeddings/word_embeddings" + string_val: "bert/embeddings/word_embeddings/adam_m" + string_val: "bert/embeddings/word_embeddings/adam_v" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_0/attention/output/dense/bias" + string_val: "bert/encoder/layer_0/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_0/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_0/attention/output/dense/kernel" + string_val: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_0/attention/self/key/bias" + string_val: "bert/encoder/layer_0/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_0/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_0/attention/self/key/kernel" + string_val: "bert/encoder/layer_0/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_0/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_0/attention/self/query/bias" + string_val: "bert/encoder/layer_0/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_0/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_0/attention/self/query/kernel" + string_val: "bert/encoder/layer_0/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_0/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_0/attention/self/value/bias" + string_val: "bert/encoder/layer_0/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_0/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_0/attention/self/value/kernel" + string_val: "bert/encoder/layer_0/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_0/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_0/intermediate/dense/bias" + string_val: "bert/encoder/layer_0/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_0/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_0/intermediate/dense/kernel" + string_val: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_0/output/LayerNorm/beta" + string_val: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_0/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_0/output/dense/bias" + string_val: "bert/encoder/layer_0/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_0/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_0/output/dense/kernel" + string_val: "bert/encoder/layer_0/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_0/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_1/attention/output/dense/bias" + string_val: "bert/encoder/layer_1/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_1/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_1/attention/output/dense/kernel" + string_val: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_1/attention/self/key/bias" + string_val: "bert/encoder/layer_1/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_1/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_1/attention/self/key/kernel" + string_val: "bert/encoder/layer_1/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_1/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_1/attention/self/query/bias" + string_val: "bert/encoder/layer_1/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_1/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_1/attention/self/query/kernel" + string_val: "bert/encoder/layer_1/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_1/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_1/attention/self/value/bias" + string_val: "bert/encoder/layer_1/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_1/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_1/attention/self/value/kernel" + string_val: "bert/encoder/layer_1/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_1/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_1/intermediate/dense/bias" + string_val: "bert/encoder/layer_1/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_1/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_1/intermediate/dense/kernel" + string_val: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_1/output/LayerNorm/beta" + string_val: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_1/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_1/output/dense/bias" + string_val: "bert/encoder/layer_1/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_1/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_1/output/dense/kernel" + string_val: "bert/encoder/layer_1/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_1/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_10/attention/output/dense/bias" + string_val: "bert/encoder/layer_10/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_10/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_10/attention/output/dense/kernel" + string_val: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_10/attention/self/key/bias" + string_val: "bert/encoder/layer_10/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_10/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_10/attention/self/key/kernel" + string_val: "bert/encoder/layer_10/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_10/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_10/attention/self/query/bias" + string_val: "bert/encoder/layer_10/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_10/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_10/attention/self/query/kernel" + string_val: "bert/encoder/layer_10/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_10/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_10/attention/self/value/bias" + string_val: "bert/encoder/layer_10/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_10/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_10/attention/self/value/kernel" + string_val: "bert/encoder/layer_10/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_10/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_10/intermediate/dense/bias" + string_val: "bert/encoder/layer_10/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_10/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_10/intermediate/dense/kernel" + string_val: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_10/output/LayerNorm/beta" + string_val: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_10/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_10/output/dense/bias" + string_val: "bert/encoder/layer_10/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_10/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_10/output/dense/kernel" + string_val: "bert/encoder/layer_10/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_10/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_11/attention/output/dense/bias" + string_val: "bert/encoder/layer_11/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_11/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_11/attention/output/dense/kernel" + string_val: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_11/attention/self/key/bias" + string_val: "bert/encoder/layer_11/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_11/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_11/attention/self/key/kernel" + string_val: "bert/encoder/layer_11/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_11/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_11/attention/self/query/bias" + string_val: "bert/encoder/layer_11/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_11/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_11/attention/self/query/kernel" + string_val: "bert/encoder/layer_11/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_11/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_11/attention/self/value/bias" + string_val: "bert/encoder/layer_11/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_11/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_11/attention/self/value/kernel" + string_val: "bert/encoder/layer_11/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_11/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_11/intermediate/dense/bias" + string_val: "bert/encoder/layer_11/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_11/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_11/intermediate/dense/kernel" + string_val: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_11/output/LayerNorm/beta" + string_val: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_11/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_11/output/dense/bias" + string_val: "bert/encoder/layer_11/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_11/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_11/output/dense/kernel" + string_val: "bert/encoder/layer_11/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_11/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_2/attention/output/dense/bias" + string_val: "bert/encoder/layer_2/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_2/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_2/attention/output/dense/kernel" + string_val: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_2/attention/self/key/bias" + string_val: "bert/encoder/layer_2/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_2/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_2/attention/self/key/kernel" + string_val: "bert/encoder/layer_2/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_2/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_2/attention/self/query/bias" + string_val: "bert/encoder/layer_2/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_2/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_2/attention/self/query/kernel" + string_val: "bert/encoder/layer_2/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_2/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_2/attention/self/value/bias" + string_val: "bert/encoder/layer_2/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_2/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_2/attention/self/value/kernel" + string_val: "bert/encoder/layer_2/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_2/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_2/intermediate/dense/bias" + string_val: "bert/encoder/layer_2/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_2/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_2/intermediate/dense/kernel" + string_val: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_2/output/LayerNorm/beta" + string_val: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_2/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_2/output/dense/bias" + string_val: "bert/encoder/layer_2/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_2/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_2/output/dense/kernel" + string_val: "bert/encoder/layer_2/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_2/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_3/attention/output/dense/bias" + string_val: "bert/encoder/layer_3/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_3/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_3/attention/output/dense/kernel" + string_val: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_3/attention/self/key/bias" + string_val: "bert/encoder/layer_3/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_3/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_3/attention/self/key/kernel" + string_val: "bert/encoder/layer_3/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_3/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_3/attention/self/query/bias" + string_val: "bert/encoder/layer_3/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_3/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_3/attention/self/query/kernel" + string_val: "bert/encoder/layer_3/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_3/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_3/attention/self/value/bias" + string_val: "bert/encoder/layer_3/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_3/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_3/attention/self/value/kernel" + string_val: "bert/encoder/layer_3/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_3/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_3/intermediate/dense/bias" + string_val: "bert/encoder/layer_3/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_3/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_3/intermediate/dense/kernel" + string_val: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_3/output/LayerNorm/beta" + string_val: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_3/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_3/output/dense/bias" + string_val: "bert/encoder/layer_3/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_3/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_3/output/dense/kernel" + string_val: "bert/encoder/layer_3/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_3/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_4/attention/output/dense/bias" + string_val: "bert/encoder/layer_4/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_4/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_4/attention/output/dense/kernel" + string_val: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_4/attention/self/key/bias" + string_val: "bert/encoder/layer_4/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_4/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_4/attention/self/key/kernel" + string_val: "bert/encoder/layer_4/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_4/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_4/attention/self/query/bias" + string_val: "bert/encoder/layer_4/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_4/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_4/attention/self/query/kernel" + string_val: "bert/encoder/layer_4/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_4/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_4/attention/self/value/bias" + string_val: "bert/encoder/layer_4/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_4/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_4/attention/self/value/kernel" + string_val: "bert/encoder/layer_4/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_4/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_4/intermediate/dense/bias" + string_val: "bert/encoder/layer_4/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_4/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_4/intermediate/dense/kernel" + string_val: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_4/output/LayerNorm/beta" + string_val: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_4/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_4/output/dense/bias" + string_val: "bert/encoder/layer_4/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_4/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_4/output/dense/kernel" + string_val: "bert/encoder/layer_4/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_4/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_5/attention/output/dense/bias" + string_val: "bert/encoder/layer_5/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_5/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_5/attention/output/dense/kernel" + string_val: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_5/attention/self/key/bias" + string_val: "bert/encoder/layer_5/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_5/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_5/attention/self/key/kernel" + string_val: "bert/encoder/layer_5/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_5/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_5/attention/self/query/bias" + string_val: "bert/encoder/layer_5/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_5/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_5/attention/self/query/kernel" + string_val: "bert/encoder/layer_5/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_5/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_5/attention/self/value/bias" + string_val: "bert/encoder/layer_5/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_5/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_5/attention/self/value/kernel" + string_val: "bert/encoder/layer_5/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_5/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_5/intermediate/dense/bias" + string_val: "bert/encoder/layer_5/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_5/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_5/intermediate/dense/kernel" + string_val: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_5/output/LayerNorm/beta" + string_val: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_5/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_5/output/dense/bias" + string_val: "bert/encoder/layer_5/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_5/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_5/output/dense/kernel" + string_val: "bert/encoder/layer_5/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_5/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_6/attention/output/dense/bias" + string_val: "bert/encoder/layer_6/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_6/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_6/attention/output/dense/kernel" + string_val: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_6/attention/self/key/bias" + string_val: "bert/encoder/layer_6/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_6/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_6/attention/self/key/kernel" + string_val: "bert/encoder/layer_6/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_6/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_6/attention/self/query/bias" + string_val: "bert/encoder/layer_6/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_6/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_6/attention/self/query/kernel" + string_val: "bert/encoder/layer_6/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_6/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_6/attention/self/value/bias" + string_val: "bert/encoder/layer_6/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_6/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_6/attention/self/value/kernel" + string_val: "bert/encoder/layer_6/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_6/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_6/intermediate/dense/bias" + string_val: "bert/encoder/layer_6/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_6/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_6/intermediate/dense/kernel" + string_val: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_6/output/LayerNorm/beta" + string_val: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_6/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_6/output/dense/bias" + string_val: "bert/encoder/layer_6/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_6/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_6/output/dense/kernel" + string_val: "bert/encoder/layer_6/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_6/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_7/attention/output/dense/bias" + string_val: "bert/encoder/layer_7/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_7/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_7/attention/output/dense/kernel" + string_val: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_7/attention/self/key/bias" + string_val: "bert/encoder/layer_7/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_7/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_7/attention/self/key/kernel" + string_val: "bert/encoder/layer_7/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_7/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_7/attention/self/query/bias" + string_val: "bert/encoder/layer_7/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_7/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_7/attention/self/query/kernel" + string_val: "bert/encoder/layer_7/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_7/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_7/attention/self/value/bias" + string_val: "bert/encoder/layer_7/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_7/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_7/attention/self/value/kernel" + string_val: "bert/encoder/layer_7/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_7/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_7/intermediate/dense/bias" + string_val: "bert/encoder/layer_7/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_7/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_7/intermediate/dense/kernel" + string_val: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_7/output/LayerNorm/beta" + string_val: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_7/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_7/output/dense/bias" + string_val: "bert/encoder/layer_7/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_7/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_7/output/dense/kernel" + string_val: "bert/encoder/layer_7/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_7/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_8/attention/output/dense/bias" + string_val: "bert/encoder/layer_8/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_8/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_8/attention/output/dense/kernel" + string_val: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_8/attention/self/key/bias" + string_val: "bert/encoder/layer_8/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_8/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_8/attention/self/key/kernel" + string_val: "bert/encoder/layer_8/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_8/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_8/attention/self/query/bias" + string_val: "bert/encoder/layer_8/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_8/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_8/attention/self/query/kernel" + string_val: "bert/encoder/layer_8/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_8/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_8/attention/self/value/bias" + string_val: "bert/encoder/layer_8/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_8/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_8/attention/self/value/kernel" + string_val: "bert/encoder/layer_8/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_8/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_8/intermediate/dense/bias" + string_val: "bert/encoder/layer_8/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_8/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_8/intermediate/dense/kernel" + string_val: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_8/output/LayerNorm/beta" + string_val: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_8/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_8/output/dense/bias" + string_val: "bert/encoder/layer_8/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_8/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_8/output/dense/kernel" + string_val: "bert/encoder/layer_8/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_8/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_9/attention/output/dense/bias" + string_val: "bert/encoder/layer_9/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_9/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_9/attention/output/dense/kernel" + string_val: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_9/attention/self/key/bias" + string_val: "bert/encoder/layer_9/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_9/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_9/attention/self/key/kernel" + string_val: "bert/encoder/layer_9/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_9/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_9/attention/self/query/bias" + string_val: "bert/encoder/layer_9/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_9/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_9/attention/self/query/kernel" + string_val: "bert/encoder/layer_9/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_9/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_9/attention/self/value/bias" + string_val: "bert/encoder/layer_9/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_9/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_9/attention/self/value/kernel" + string_val: "bert/encoder/layer_9/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_9/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_9/intermediate/dense/bias" + string_val: "bert/encoder/layer_9/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_9/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_9/intermediate/dense/kernel" + string_val: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_9/output/LayerNorm/beta" + string_val: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_9/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_9/output/dense/bias" + string_val: "bert/encoder/layer_9/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_9/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_9/output/dense/kernel" + string_val: "bert/encoder/layer_9/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_9/output/dense/kernel/adam_v" + string_val: "bert/pooler/dense/bias" + string_val: "bert/pooler/dense/bias/adam_m" + string_val: "bert/pooler/dense/bias/adam_v" + string_val: "bert/pooler/dense/kernel" + string_val: "bert/pooler/dense/kernel/adam_m" + string_val: "bert/pooler/dense/kernel/adam_v" + string_val: "global_step" + string_val: "output_bias" + string_val: "output_bias/adam_m" + string_val: "output_bias/adam_v" + string_val: "output_weights" + string_val: "output_weights/adam_m" + string_val: "output_weights/adam_v" + } + } + } +} +node { + name: "save/RestoreV2/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 604 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 604 + } + } + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + } + } + } +} +node { + name: "save/RestoreV2" + op: "RestoreV2" + input: "save/Const" + input: "save/RestoreV2/tensor_names" + input: "save/RestoreV2/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_INT64 + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + } + } + } +} +node { + name: "save/Assign" + op: "Assign" + input: "bert/embeddings/LayerNorm/beta" + input: "save/RestoreV2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_1" + op: "Assign" + input: "bert/embeddings/LayerNorm/beta/adam_m" + input: "save/RestoreV2:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_2" + op: "Assign" + input: "bert/embeddings/LayerNorm/beta/adam_v" + input: "save/RestoreV2:2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_3" + op: "Assign" + input: "bert/embeddings/LayerNorm/gamma" + input: "save/RestoreV2:3" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_4" + op: "Assign" + input: "bert/embeddings/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:4" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_5" + op: "Assign" + input: "bert/embeddings/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:5" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_6" + op: "Assign" + input: "bert/embeddings/position_embeddings" + input: "save/RestoreV2:6" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_7" + op: "Assign" + input: "bert/embeddings/position_embeddings/adam_m" + input: "save/RestoreV2:7" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_8" + op: "Assign" + input: "bert/embeddings/position_embeddings/adam_v" + input: "save/RestoreV2:8" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_9" + op: "Assign" + input: "bert/embeddings/token_type_embeddings" + input: "save/RestoreV2:9" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_10" + op: "Assign" + input: "bert/embeddings/token_type_embeddings/adam_m" + input: "save/RestoreV2:10" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_11" + op: "Assign" + input: "bert/embeddings/token_type_embeddings/adam_v" + input: "save/RestoreV2:11" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_12" + op: "Assign" + input: "bert/embeddings/word_embeddings" + input: "save/RestoreV2:12" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 21128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_13" + op: "Assign" + input: "bert/embeddings/word_embeddings/adam_m" + input: "save/RestoreV2:13" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 21128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_14" + op: "Assign" + input: "bert/embeddings/word_embeddings/adam_v" + input: "save/RestoreV2:14" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 21128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_15" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta" + input: "save/RestoreV2:15" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_16" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:16" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_17" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:17" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_18" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma" + input: "save/RestoreV2:18" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_19" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:19" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_20" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:20" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_21" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/dense/bias" + input: "save/RestoreV2:21" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_22" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/dense/bias/adam_m" + input: "save/RestoreV2:22" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_23" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/dense/bias/adam_v" + input: "save/RestoreV2:23" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_24" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/dense/kernel" + input: "save/RestoreV2:24" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_25" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + input: "save/RestoreV2:25" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_26" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + input: "save/RestoreV2:26" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_27" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/key/bias" + input: "save/RestoreV2:27" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_28" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/key/bias/adam_m" + input: "save/RestoreV2:28" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_29" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/key/bias/adam_v" + input: "save/RestoreV2:29" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_30" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/key/kernel" + input: "save/RestoreV2:30" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_31" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_m" + input: "save/RestoreV2:31" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_32" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_v" + input: "save/RestoreV2:32" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_33" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/query/bias" + input: "save/RestoreV2:33" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_34" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/query/bias/adam_m" + input: "save/RestoreV2:34" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_35" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/query/bias/adam_v" + input: "save/RestoreV2:35" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_36" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/query/kernel" + input: "save/RestoreV2:36" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_37" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_m" + input: "save/RestoreV2:37" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_38" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_v" + input: "save/RestoreV2:38" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_39" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/value/bias" + input: "save/RestoreV2:39" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_40" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/value/bias/adam_m" + input: "save/RestoreV2:40" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_41" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/value/bias/adam_v" + input: "save/RestoreV2:41" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_42" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/value/kernel" + input: "save/RestoreV2:42" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_43" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_m" + input: "save/RestoreV2:43" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_44" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_v" + input: "save/RestoreV2:44" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_45" + op: "Assign" + input: "bert/encoder/layer_0/intermediate/dense/bias" + input: "save/RestoreV2:45" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_46" + op: "Assign" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_m" + input: "save/RestoreV2:46" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_47" + op: "Assign" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_v" + input: "save/RestoreV2:47" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_48" + op: "Assign" + input: "bert/encoder/layer_0/intermediate/dense/kernel" + input: "save/RestoreV2:48" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_49" + op: "Assign" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + input: "save/RestoreV2:49" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_50" + op: "Assign" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + input: "save/RestoreV2:50" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_51" + op: "Assign" + input: "bert/encoder/layer_0/output/LayerNorm/beta" + input: "save/RestoreV2:51" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_52" + op: "Assign" + input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:52" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_53" + op: "Assign" + input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:53" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_54" + op: "Assign" + input: "bert/encoder/layer_0/output/LayerNorm/gamma" + input: "save/RestoreV2:54" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_55" + op: "Assign" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:55" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_56" + op: "Assign" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:56" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_57" + op: "Assign" + input: "bert/encoder/layer_0/output/dense/bias" + input: "save/RestoreV2:57" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_58" + op: "Assign" + input: "bert/encoder/layer_0/output/dense/bias/adam_m" + input: "save/RestoreV2:58" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_59" + op: "Assign" + input: "bert/encoder/layer_0/output/dense/bias/adam_v" + input: "save/RestoreV2:59" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_60" + op: "Assign" + input: "bert/encoder/layer_0/output/dense/kernel" + input: "save/RestoreV2:60" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_61" + op: "Assign" + input: "bert/encoder/layer_0/output/dense/kernel/adam_m" + input: "save/RestoreV2:61" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_62" + op: "Assign" + input: "bert/encoder/layer_0/output/dense/kernel/adam_v" + input: "save/RestoreV2:62" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_63" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta" + input: "save/RestoreV2:63" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_64" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:64" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_65" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:65" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_66" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma" + input: "save/RestoreV2:66" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_67" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:67" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_68" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:68" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_69" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/dense/bias" + input: "save/RestoreV2:69" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_70" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/dense/bias/adam_m" + input: "save/RestoreV2:70" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_71" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/dense/bias/adam_v" + input: "save/RestoreV2:71" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_72" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/dense/kernel" + input: "save/RestoreV2:72" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_73" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + input: "save/RestoreV2:73" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_74" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + input: "save/RestoreV2:74" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_75" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/key/bias" + input: "save/RestoreV2:75" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_76" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/key/bias/adam_m" + input: "save/RestoreV2:76" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_77" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/key/bias/adam_v" + input: "save/RestoreV2:77" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_78" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/key/kernel" + input: "save/RestoreV2:78" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_79" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_m" + input: "save/RestoreV2:79" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_80" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_v" + input: "save/RestoreV2:80" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_81" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/query/bias" + input: "save/RestoreV2:81" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_82" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/query/bias/adam_m" + input: "save/RestoreV2:82" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_83" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/query/bias/adam_v" + input: "save/RestoreV2:83" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_84" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/query/kernel" + input: "save/RestoreV2:84" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_85" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_m" + input: "save/RestoreV2:85" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_86" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_v" + input: "save/RestoreV2:86" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_87" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/value/bias" + input: "save/RestoreV2:87" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_88" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/value/bias/adam_m" + input: "save/RestoreV2:88" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_89" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/value/bias/adam_v" + input: "save/RestoreV2:89" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_90" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/value/kernel" + input: "save/RestoreV2:90" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_91" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_m" + input: "save/RestoreV2:91" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_92" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_v" + input: "save/RestoreV2:92" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_93" + op: "Assign" + input: "bert/encoder/layer_1/intermediate/dense/bias" + input: "save/RestoreV2:93" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_94" + op: "Assign" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_m" + input: "save/RestoreV2:94" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_95" + op: "Assign" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_v" + input: "save/RestoreV2:95" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_96" + op: "Assign" + input: "bert/encoder/layer_1/intermediate/dense/kernel" + input: "save/RestoreV2:96" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_97" + op: "Assign" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + input: "save/RestoreV2:97" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_98" + op: "Assign" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + input: "save/RestoreV2:98" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_99" + op: "Assign" + input: "bert/encoder/layer_1/output/LayerNorm/beta" + input: "save/RestoreV2:99" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_100" + op: "Assign" + input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:100" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_101" + op: "Assign" + input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:101" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_102" + op: "Assign" + input: "bert/encoder/layer_1/output/LayerNorm/gamma" + input: "save/RestoreV2:102" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_103" + op: "Assign" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:103" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_104" + op: "Assign" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:104" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_105" + op: "Assign" + input: "bert/encoder/layer_1/output/dense/bias" + input: "save/RestoreV2:105" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_106" + op: "Assign" + input: "bert/encoder/layer_1/output/dense/bias/adam_m" + input: "save/RestoreV2:106" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_107" + op: "Assign" + input: "bert/encoder/layer_1/output/dense/bias/adam_v" + input: "save/RestoreV2:107" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_108" + op: "Assign" + input: "bert/encoder/layer_1/output/dense/kernel" + input: "save/RestoreV2:108" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_109" + op: "Assign" + input: "bert/encoder/layer_1/output/dense/kernel/adam_m" + input: "save/RestoreV2:109" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_110" + op: "Assign" + input: "bert/encoder/layer_1/output/dense/kernel/adam_v" + input: "save/RestoreV2:110" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_111" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta" + input: "save/RestoreV2:111" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_112" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:112" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_113" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:113" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_114" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma" + input: "save/RestoreV2:114" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_115" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:115" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_116" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:116" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_117" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/dense/bias" + input: "save/RestoreV2:117" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_118" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/dense/bias/adam_m" + input: "save/RestoreV2:118" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_119" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/dense/bias/adam_v" + input: "save/RestoreV2:119" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_120" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/dense/kernel" + input: "save/RestoreV2:120" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_121" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + input: "save/RestoreV2:121" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_122" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + input: "save/RestoreV2:122" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_123" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/key/bias" + input: "save/RestoreV2:123" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_124" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/key/bias/adam_m" + input: "save/RestoreV2:124" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_125" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/key/bias/adam_v" + input: "save/RestoreV2:125" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_126" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/key/kernel" + input: "save/RestoreV2:126" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_127" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_m" + input: "save/RestoreV2:127" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_128" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_v" + input: "save/RestoreV2:128" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_129" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/query/bias" + input: "save/RestoreV2:129" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_130" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/query/bias/adam_m" + input: "save/RestoreV2:130" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_131" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/query/bias/adam_v" + input: "save/RestoreV2:131" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_132" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/query/kernel" + input: "save/RestoreV2:132" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_133" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_m" + input: "save/RestoreV2:133" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_134" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_v" + input: "save/RestoreV2:134" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_135" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/value/bias" + input: "save/RestoreV2:135" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_136" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/value/bias/adam_m" + input: "save/RestoreV2:136" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_137" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/value/bias/adam_v" + input: "save/RestoreV2:137" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_138" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/value/kernel" + input: "save/RestoreV2:138" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_139" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_m" + input: "save/RestoreV2:139" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_140" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_v" + input: "save/RestoreV2:140" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_141" + op: "Assign" + input: "bert/encoder/layer_10/intermediate/dense/bias" + input: "save/RestoreV2:141" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_142" + op: "Assign" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_m" + input: "save/RestoreV2:142" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_143" + op: "Assign" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_v" + input: "save/RestoreV2:143" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_144" + op: "Assign" + input: "bert/encoder/layer_10/intermediate/dense/kernel" + input: "save/RestoreV2:144" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_145" + op: "Assign" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + input: "save/RestoreV2:145" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_146" + op: "Assign" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + input: "save/RestoreV2:146" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_147" + op: "Assign" + input: "bert/encoder/layer_10/output/LayerNorm/beta" + input: "save/RestoreV2:147" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_148" + op: "Assign" + input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:148" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_149" + op: "Assign" + input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:149" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_150" + op: "Assign" + input: "bert/encoder/layer_10/output/LayerNorm/gamma" + input: "save/RestoreV2:150" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_151" + op: "Assign" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:151" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_152" + op: "Assign" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:152" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_153" + op: "Assign" + input: "bert/encoder/layer_10/output/dense/bias" + input: "save/RestoreV2:153" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_154" + op: "Assign" + input: "bert/encoder/layer_10/output/dense/bias/adam_m" + input: "save/RestoreV2:154" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_155" + op: "Assign" + input: "bert/encoder/layer_10/output/dense/bias/adam_v" + input: "save/RestoreV2:155" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_156" + op: "Assign" + input: "bert/encoder/layer_10/output/dense/kernel" + input: "save/RestoreV2:156" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_157" + op: "Assign" + input: "bert/encoder/layer_10/output/dense/kernel/adam_m" + input: "save/RestoreV2:157" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_158" + op: "Assign" + input: "bert/encoder/layer_10/output/dense/kernel/adam_v" + input: "save/RestoreV2:158" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_159" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta" + input: "save/RestoreV2:159" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_160" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:160" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_161" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:161" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_162" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma" + input: "save/RestoreV2:162" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_163" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:163" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_164" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:164" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_165" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/dense/bias" + input: "save/RestoreV2:165" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_166" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/dense/bias/adam_m" + input: "save/RestoreV2:166" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_167" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/dense/bias/adam_v" + input: "save/RestoreV2:167" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_168" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/dense/kernel" + input: "save/RestoreV2:168" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_169" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + input: "save/RestoreV2:169" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_170" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + input: "save/RestoreV2:170" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_171" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/key/bias" + input: "save/RestoreV2:171" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_172" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/key/bias/adam_m" + input: "save/RestoreV2:172" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_173" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/key/bias/adam_v" + input: "save/RestoreV2:173" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_174" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/key/kernel" + input: "save/RestoreV2:174" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_175" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_m" + input: "save/RestoreV2:175" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_176" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_v" + input: "save/RestoreV2:176" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_177" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/query/bias" + input: "save/RestoreV2:177" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_178" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/query/bias/adam_m" + input: "save/RestoreV2:178" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_179" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/query/bias/adam_v" + input: "save/RestoreV2:179" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_180" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/query/kernel" + input: "save/RestoreV2:180" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_181" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_m" + input: "save/RestoreV2:181" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_182" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_v" + input: "save/RestoreV2:182" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_183" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/value/bias" + input: "save/RestoreV2:183" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_184" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/value/bias/adam_m" + input: "save/RestoreV2:184" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_185" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/value/bias/adam_v" + input: "save/RestoreV2:185" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_186" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/value/kernel" + input: "save/RestoreV2:186" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_187" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_m" + input: "save/RestoreV2:187" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_188" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_v" + input: "save/RestoreV2:188" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_189" + op: "Assign" + input: "bert/encoder/layer_11/intermediate/dense/bias" + input: "save/RestoreV2:189" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_190" + op: "Assign" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_m" + input: "save/RestoreV2:190" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_191" + op: "Assign" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_v" + input: "save/RestoreV2:191" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_192" + op: "Assign" + input: "bert/encoder/layer_11/intermediate/dense/kernel" + input: "save/RestoreV2:192" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_193" + op: "Assign" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + input: "save/RestoreV2:193" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_194" + op: "Assign" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + input: "save/RestoreV2:194" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_195" + op: "Assign" + input: "bert/encoder/layer_11/output/LayerNorm/beta" + input: "save/RestoreV2:195" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_196" + op: "Assign" + input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:196" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_197" + op: "Assign" + input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:197" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_198" + op: "Assign" + input: "bert/encoder/layer_11/output/LayerNorm/gamma" + input: "save/RestoreV2:198" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_199" + op: "Assign" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:199" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_200" + op: "Assign" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:200" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_201" + op: "Assign" + input: "bert/encoder/layer_11/output/dense/bias" + input: "save/RestoreV2:201" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_202" + op: "Assign" + input: "bert/encoder/layer_11/output/dense/bias/adam_m" + input: "save/RestoreV2:202" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_203" + op: "Assign" + input: "bert/encoder/layer_11/output/dense/bias/adam_v" + input: "save/RestoreV2:203" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_204" + op: "Assign" + input: "bert/encoder/layer_11/output/dense/kernel" + input: "save/RestoreV2:204" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_205" + op: "Assign" + input: "bert/encoder/layer_11/output/dense/kernel/adam_m" + input: "save/RestoreV2:205" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_206" + op: "Assign" + input: "bert/encoder/layer_11/output/dense/kernel/adam_v" + input: "save/RestoreV2:206" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_207" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta" + input: "save/RestoreV2:207" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_208" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:208" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_209" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:209" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_210" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma" + input: "save/RestoreV2:210" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_211" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:211" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_212" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:212" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_213" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/dense/bias" + input: "save/RestoreV2:213" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_214" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/dense/bias/adam_m" + input: "save/RestoreV2:214" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_215" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/dense/bias/adam_v" + input: "save/RestoreV2:215" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_216" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/dense/kernel" + input: "save/RestoreV2:216" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_217" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + input: "save/RestoreV2:217" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_218" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + input: "save/RestoreV2:218" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_219" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/key/bias" + input: "save/RestoreV2:219" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_220" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/key/bias/adam_m" + input: "save/RestoreV2:220" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_221" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/key/bias/adam_v" + input: "save/RestoreV2:221" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_222" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/key/kernel" + input: "save/RestoreV2:222" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_223" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_m" + input: "save/RestoreV2:223" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_224" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_v" + input: "save/RestoreV2:224" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_225" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/query/bias" + input: "save/RestoreV2:225" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_226" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/query/bias/adam_m" + input: "save/RestoreV2:226" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_227" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/query/bias/adam_v" + input: "save/RestoreV2:227" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_228" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/query/kernel" + input: "save/RestoreV2:228" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_229" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_m" + input: "save/RestoreV2:229" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_230" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_v" + input: "save/RestoreV2:230" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_231" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/value/bias" + input: "save/RestoreV2:231" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_232" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/value/bias/adam_m" + input: "save/RestoreV2:232" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_233" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/value/bias/adam_v" + input: "save/RestoreV2:233" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_234" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/value/kernel" + input: "save/RestoreV2:234" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_235" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_m" + input: "save/RestoreV2:235" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_236" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_v" + input: "save/RestoreV2:236" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_237" + op: "Assign" + input: "bert/encoder/layer_2/intermediate/dense/bias" + input: "save/RestoreV2:237" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_238" + op: "Assign" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_m" + input: "save/RestoreV2:238" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_239" + op: "Assign" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_v" + input: "save/RestoreV2:239" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_240" + op: "Assign" + input: "bert/encoder/layer_2/intermediate/dense/kernel" + input: "save/RestoreV2:240" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_241" + op: "Assign" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + input: "save/RestoreV2:241" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_242" + op: "Assign" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + input: "save/RestoreV2:242" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_243" + op: "Assign" + input: "bert/encoder/layer_2/output/LayerNorm/beta" + input: "save/RestoreV2:243" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_244" + op: "Assign" + input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:244" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_245" + op: "Assign" + input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:245" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_246" + op: "Assign" + input: "bert/encoder/layer_2/output/LayerNorm/gamma" + input: "save/RestoreV2:246" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_247" + op: "Assign" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:247" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_248" + op: "Assign" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:248" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_249" + op: "Assign" + input: "bert/encoder/layer_2/output/dense/bias" + input: "save/RestoreV2:249" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_250" + op: "Assign" + input: "bert/encoder/layer_2/output/dense/bias/adam_m" + input: "save/RestoreV2:250" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_251" + op: "Assign" + input: "bert/encoder/layer_2/output/dense/bias/adam_v" + input: "save/RestoreV2:251" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_252" + op: "Assign" + input: "bert/encoder/layer_2/output/dense/kernel" + input: "save/RestoreV2:252" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_253" + op: "Assign" + input: "bert/encoder/layer_2/output/dense/kernel/adam_m" + input: "save/RestoreV2:253" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_254" + op: "Assign" + input: "bert/encoder/layer_2/output/dense/kernel/adam_v" + input: "save/RestoreV2:254" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_255" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta" + input: "save/RestoreV2:255" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_256" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:256" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_257" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:257" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_258" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma" + input: "save/RestoreV2:258" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_259" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:259" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_260" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:260" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_261" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/dense/bias" + input: "save/RestoreV2:261" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_262" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/dense/bias/adam_m" + input: "save/RestoreV2:262" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_263" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/dense/bias/adam_v" + input: "save/RestoreV2:263" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_264" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/dense/kernel" + input: "save/RestoreV2:264" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_265" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + input: "save/RestoreV2:265" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_266" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + input: "save/RestoreV2:266" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_267" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/key/bias" + input: "save/RestoreV2:267" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_268" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/key/bias/adam_m" + input: "save/RestoreV2:268" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_269" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/key/bias/adam_v" + input: "save/RestoreV2:269" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_270" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/key/kernel" + input: "save/RestoreV2:270" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_271" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_m" + input: "save/RestoreV2:271" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_272" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_v" + input: "save/RestoreV2:272" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_273" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/query/bias" + input: "save/RestoreV2:273" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_274" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/query/bias/adam_m" + input: "save/RestoreV2:274" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_275" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/query/bias/adam_v" + input: "save/RestoreV2:275" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_276" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/query/kernel" + input: "save/RestoreV2:276" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_277" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_m" + input: "save/RestoreV2:277" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_278" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_v" + input: "save/RestoreV2:278" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_279" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/value/bias" + input: "save/RestoreV2:279" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_280" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/value/bias/adam_m" + input: "save/RestoreV2:280" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_281" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/value/bias/adam_v" + input: "save/RestoreV2:281" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_282" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/value/kernel" + input: "save/RestoreV2:282" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_283" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_m" + input: "save/RestoreV2:283" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_284" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_v" + input: "save/RestoreV2:284" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_285" + op: "Assign" + input: "bert/encoder/layer_3/intermediate/dense/bias" + input: "save/RestoreV2:285" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_286" + op: "Assign" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_m" + input: "save/RestoreV2:286" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_287" + op: "Assign" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_v" + input: "save/RestoreV2:287" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_288" + op: "Assign" + input: "bert/encoder/layer_3/intermediate/dense/kernel" + input: "save/RestoreV2:288" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_289" + op: "Assign" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + input: "save/RestoreV2:289" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_290" + op: "Assign" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + input: "save/RestoreV2:290" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_291" + op: "Assign" + input: "bert/encoder/layer_3/output/LayerNorm/beta" + input: "save/RestoreV2:291" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_292" + op: "Assign" + input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:292" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_293" + op: "Assign" + input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:293" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_294" + op: "Assign" + input: "bert/encoder/layer_3/output/LayerNorm/gamma" + input: "save/RestoreV2:294" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_295" + op: "Assign" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:295" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_296" + op: "Assign" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:296" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_297" + op: "Assign" + input: "bert/encoder/layer_3/output/dense/bias" + input: "save/RestoreV2:297" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_298" + op: "Assign" + input: "bert/encoder/layer_3/output/dense/bias/adam_m" + input: "save/RestoreV2:298" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_299" + op: "Assign" + input: "bert/encoder/layer_3/output/dense/bias/adam_v" + input: "save/RestoreV2:299" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_300" + op: "Assign" + input: "bert/encoder/layer_3/output/dense/kernel" + input: "save/RestoreV2:300" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_301" + op: "Assign" + input: "bert/encoder/layer_3/output/dense/kernel/adam_m" + input: "save/RestoreV2:301" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_302" + op: "Assign" + input: "bert/encoder/layer_3/output/dense/kernel/adam_v" + input: "save/RestoreV2:302" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_303" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta" + input: "save/RestoreV2:303" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_304" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:304" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_305" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:305" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_306" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma" + input: "save/RestoreV2:306" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_307" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:307" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_308" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:308" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_309" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/dense/bias" + input: "save/RestoreV2:309" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_310" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/dense/bias/adam_m" + input: "save/RestoreV2:310" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_311" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/dense/bias/adam_v" + input: "save/RestoreV2:311" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_312" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/dense/kernel" + input: "save/RestoreV2:312" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_313" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + input: "save/RestoreV2:313" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_314" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + input: "save/RestoreV2:314" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_315" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/key/bias" + input: "save/RestoreV2:315" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_316" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/key/bias/adam_m" + input: "save/RestoreV2:316" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_317" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/key/bias/adam_v" + input: "save/RestoreV2:317" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_318" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/key/kernel" + input: "save/RestoreV2:318" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_319" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_m" + input: "save/RestoreV2:319" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_320" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_v" + input: "save/RestoreV2:320" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_321" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/query/bias" + input: "save/RestoreV2:321" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_322" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/query/bias/adam_m" + input: "save/RestoreV2:322" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_323" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/query/bias/adam_v" + input: "save/RestoreV2:323" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_324" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/query/kernel" + input: "save/RestoreV2:324" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_325" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_m" + input: "save/RestoreV2:325" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_326" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_v" + input: "save/RestoreV2:326" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_327" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/value/bias" + input: "save/RestoreV2:327" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_328" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/value/bias/adam_m" + input: "save/RestoreV2:328" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_329" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/value/bias/adam_v" + input: "save/RestoreV2:329" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_330" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/value/kernel" + input: "save/RestoreV2:330" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_331" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_m" + input: "save/RestoreV2:331" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_332" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_v" + input: "save/RestoreV2:332" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_333" + op: "Assign" + input: "bert/encoder/layer_4/intermediate/dense/bias" + input: "save/RestoreV2:333" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_334" + op: "Assign" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_m" + input: "save/RestoreV2:334" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_335" + op: "Assign" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_v" + input: "save/RestoreV2:335" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_336" + op: "Assign" + input: "bert/encoder/layer_4/intermediate/dense/kernel" + input: "save/RestoreV2:336" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_337" + op: "Assign" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + input: "save/RestoreV2:337" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_338" + op: "Assign" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + input: "save/RestoreV2:338" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_339" + op: "Assign" + input: "bert/encoder/layer_4/output/LayerNorm/beta" + input: "save/RestoreV2:339" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_340" + op: "Assign" + input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:340" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_341" + op: "Assign" + input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:341" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_342" + op: "Assign" + input: "bert/encoder/layer_4/output/LayerNorm/gamma" + input: "save/RestoreV2:342" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_343" + op: "Assign" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:343" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_344" + op: "Assign" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:344" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_345" + op: "Assign" + input: "bert/encoder/layer_4/output/dense/bias" + input: "save/RestoreV2:345" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_346" + op: "Assign" + input: "bert/encoder/layer_4/output/dense/bias/adam_m" + input: "save/RestoreV2:346" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_347" + op: "Assign" + input: "bert/encoder/layer_4/output/dense/bias/adam_v" + input: "save/RestoreV2:347" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_348" + op: "Assign" + input: "bert/encoder/layer_4/output/dense/kernel" + input: "save/RestoreV2:348" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_349" + op: "Assign" + input: "bert/encoder/layer_4/output/dense/kernel/adam_m" + input: "save/RestoreV2:349" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_350" + op: "Assign" + input: "bert/encoder/layer_4/output/dense/kernel/adam_v" + input: "save/RestoreV2:350" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_351" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta" + input: "save/RestoreV2:351" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_352" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:352" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_353" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:353" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_354" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma" + input: "save/RestoreV2:354" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_355" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:355" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_356" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:356" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_357" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/dense/bias" + input: "save/RestoreV2:357" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_358" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/dense/bias/adam_m" + input: "save/RestoreV2:358" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_359" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/dense/bias/adam_v" + input: "save/RestoreV2:359" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_360" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/dense/kernel" + input: "save/RestoreV2:360" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_361" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + input: "save/RestoreV2:361" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_362" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + input: "save/RestoreV2:362" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_363" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/key/bias" + input: "save/RestoreV2:363" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_364" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/key/bias/adam_m" + input: "save/RestoreV2:364" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_365" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/key/bias/adam_v" + input: "save/RestoreV2:365" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_366" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/key/kernel" + input: "save/RestoreV2:366" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_367" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_m" + input: "save/RestoreV2:367" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_368" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_v" + input: "save/RestoreV2:368" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_369" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/query/bias" + input: "save/RestoreV2:369" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_370" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/query/bias/adam_m" + input: "save/RestoreV2:370" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_371" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/query/bias/adam_v" + input: "save/RestoreV2:371" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_372" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/query/kernel" + input: "save/RestoreV2:372" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_373" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_m" + input: "save/RestoreV2:373" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_374" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_v" + input: "save/RestoreV2:374" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_375" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/value/bias" + input: "save/RestoreV2:375" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_376" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/value/bias/adam_m" + input: "save/RestoreV2:376" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_377" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/value/bias/adam_v" + input: "save/RestoreV2:377" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_378" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/value/kernel" + input: "save/RestoreV2:378" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_379" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_m" + input: "save/RestoreV2:379" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_380" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_v" + input: "save/RestoreV2:380" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_381" + op: "Assign" + input: "bert/encoder/layer_5/intermediate/dense/bias" + input: "save/RestoreV2:381" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_382" + op: "Assign" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_m" + input: "save/RestoreV2:382" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_383" + op: "Assign" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_v" + input: "save/RestoreV2:383" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_384" + op: "Assign" + input: "bert/encoder/layer_5/intermediate/dense/kernel" + input: "save/RestoreV2:384" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_385" + op: "Assign" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + input: "save/RestoreV2:385" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_386" + op: "Assign" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + input: "save/RestoreV2:386" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_387" + op: "Assign" + input: "bert/encoder/layer_5/output/LayerNorm/beta" + input: "save/RestoreV2:387" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_388" + op: "Assign" + input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:388" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_389" + op: "Assign" + input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:389" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_390" + op: "Assign" + input: "bert/encoder/layer_5/output/LayerNorm/gamma" + input: "save/RestoreV2:390" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_391" + op: "Assign" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:391" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_392" + op: "Assign" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:392" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_393" + op: "Assign" + input: "bert/encoder/layer_5/output/dense/bias" + input: "save/RestoreV2:393" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_394" + op: "Assign" + input: "bert/encoder/layer_5/output/dense/bias/adam_m" + input: "save/RestoreV2:394" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_395" + op: "Assign" + input: "bert/encoder/layer_5/output/dense/bias/adam_v" + input: "save/RestoreV2:395" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_396" + op: "Assign" + input: "bert/encoder/layer_5/output/dense/kernel" + input: "save/RestoreV2:396" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_397" + op: "Assign" + input: "bert/encoder/layer_5/output/dense/kernel/adam_m" + input: "save/RestoreV2:397" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_398" + op: "Assign" + input: "bert/encoder/layer_5/output/dense/kernel/adam_v" + input: "save/RestoreV2:398" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_399" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta" + input: "save/RestoreV2:399" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_400" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:400" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_401" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:401" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_402" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma" + input: "save/RestoreV2:402" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_403" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:403" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_404" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:404" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_405" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/dense/bias" + input: "save/RestoreV2:405" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_406" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/dense/bias/adam_m" + input: "save/RestoreV2:406" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_407" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/dense/bias/adam_v" + input: "save/RestoreV2:407" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_408" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/dense/kernel" + input: "save/RestoreV2:408" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_409" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + input: "save/RestoreV2:409" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_410" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + input: "save/RestoreV2:410" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_411" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/key/bias" + input: "save/RestoreV2:411" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_412" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/key/bias/adam_m" + input: "save/RestoreV2:412" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_413" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/key/bias/adam_v" + input: "save/RestoreV2:413" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_414" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/key/kernel" + input: "save/RestoreV2:414" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_415" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_m" + input: "save/RestoreV2:415" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_416" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_v" + input: "save/RestoreV2:416" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_417" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/query/bias" + input: "save/RestoreV2:417" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_418" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/query/bias/adam_m" + input: "save/RestoreV2:418" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_419" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/query/bias/adam_v" + input: "save/RestoreV2:419" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_420" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/query/kernel" + input: "save/RestoreV2:420" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_421" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_m" + input: "save/RestoreV2:421" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_422" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_v" + input: "save/RestoreV2:422" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_423" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/value/bias" + input: "save/RestoreV2:423" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_424" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/value/bias/adam_m" + input: "save/RestoreV2:424" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_425" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/value/bias/adam_v" + input: "save/RestoreV2:425" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_426" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/value/kernel" + input: "save/RestoreV2:426" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_427" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_m" + input: "save/RestoreV2:427" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_428" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_v" + input: "save/RestoreV2:428" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_429" + op: "Assign" + input: "bert/encoder/layer_6/intermediate/dense/bias" + input: "save/RestoreV2:429" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_430" + op: "Assign" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_m" + input: "save/RestoreV2:430" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_431" + op: "Assign" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_v" + input: "save/RestoreV2:431" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_432" + op: "Assign" + input: "bert/encoder/layer_6/intermediate/dense/kernel" + input: "save/RestoreV2:432" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_433" + op: "Assign" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + input: "save/RestoreV2:433" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_434" + op: "Assign" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + input: "save/RestoreV2:434" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_435" + op: "Assign" + input: "bert/encoder/layer_6/output/LayerNorm/beta" + input: "save/RestoreV2:435" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_436" + op: "Assign" + input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:436" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_437" + op: "Assign" + input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:437" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_438" + op: "Assign" + input: "bert/encoder/layer_6/output/LayerNorm/gamma" + input: "save/RestoreV2:438" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_439" + op: "Assign" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:439" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_440" + op: "Assign" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:440" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_441" + op: "Assign" + input: "bert/encoder/layer_6/output/dense/bias" + input: "save/RestoreV2:441" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_442" + op: "Assign" + input: "bert/encoder/layer_6/output/dense/bias/adam_m" + input: "save/RestoreV2:442" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_443" + op: "Assign" + input: "bert/encoder/layer_6/output/dense/bias/adam_v" + input: "save/RestoreV2:443" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_444" + op: "Assign" + input: "bert/encoder/layer_6/output/dense/kernel" + input: "save/RestoreV2:444" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_445" + op: "Assign" + input: "bert/encoder/layer_6/output/dense/kernel/adam_m" + input: "save/RestoreV2:445" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_446" + op: "Assign" + input: "bert/encoder/layer_6/output/dense/kernel/adam_v" + input: "save/RestoreV2:446" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_447" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta" + input: "save/RestoreV2:447" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_448" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:448" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_449" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:449" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_450" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma" + input: "save/RestoreV2:450" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_451" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:451" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_452" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:452" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_453" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/dense/bias" + input: "save/RestoreV2:453" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_454" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/dense/bias/adam_m" + input: "save/RestoreV2:454" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_455" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/dense/bias/adam_v" + input: "save/RestoreV2:455" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_456" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/dense/kernel" + input: "save/RestoreV2:456" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_457" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + input: "save/RestoreV2:457" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_458" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + input: "save/RestoreV2:458" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_459" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/key/bias" + input: "save/RestoreV2:459" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_460" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/key/bias/adam_m" + input: "save/RestoreV2:460" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_461" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/key/bias/adam_v" + input: "save/RestoreV2:461" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_462" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/key/kernel" + input: "save/RestoreV2:462" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_463" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_m" + input: "save/RestoreV2:463" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_464" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_v" + input: "save/RestoreV2:464" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_465" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/query/bias" + input: "save/RestoreV2:465" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_466" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/query/bias/adam_m" + input: "save/RestoreV2:466" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_467" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/query/bias/adam_v" + input: "save/RestoreV2:467" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_468" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/query/kernel" + input: "save/RestoreV2:468" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_469" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_m" + input: "save/RestoreV2:469" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_470" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_v" + input: "save/RestoreV2:470" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_471" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/value/bias" + input: "save/RestoreV2:471" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_472" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/value/bias/adam_m" + input: "save/RestoreV2:472" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_473" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/value/bias/adam_v" + input: "save/RestoreV2:473" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_474" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/value/kernel" + input: "save/RestoreV2:474" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_475" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_m" + input: "save/RestoreV2:475" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_476" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_v" + input: "save/RestoreV2:476" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_477" + op: "Assign" + input: "bert/encoder/layer_7/intermediate/dense/bias" + input: "save/RestoreV2:477" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_478" + op: "Assign" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_m" + input: "save/RestoreV2:478" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_479" + op: "Assign" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_v" + input: "save/RestoreV2:479" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_480" + op: "Assign" + input: "bert/encoder/layer_7/intermediate/dense/kernel" + input: "save/RestoreV2:480" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_481" + op: "Assign" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + input: "save/RestoreV2:481" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_482" + op: "Assign" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + input: "save/RestoreV2:482" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_483" + op: "Assign" + input: "bert/encoder/layer_7/output/LayerNorm/beta" + input: "save/RestoreV2:483" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_484" + op: "Assign" + input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:484" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_485" + op: "Assign" + input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:485" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_486" + op: "Assign" + input: "bert/encoder/layer_7/output/LayerNorm/gamma" + input: "save/RestoreV2:486" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_487" + op: "Assign" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:487" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_488" + op: "Assign" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:488" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_489" + op: "Assign" + input: "bert/encoder/layer_7/output/dense/bias" + input: "save/RestoreV2:489" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_490" + op: "Assign" + input: "bert/encoder/layer_7/output/dense/bias/adam_m" + input: "save/RestoreV2:490" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_491" + op: "Assign" + input: "bert/encoder/layer_7/output/dense/bias/adam_v" + input: "save/RestoreV2:491" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_492" + op: "Assign" + input: "bert/encoder/layer_7/output/dense/kernel" + input: "save/RestoreV2:492" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_493" + op: "Assign" + input: "bert/encoder/layer_7/output/dense/kernel/adam_m" + input: "save/RestoreV2:493" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_494" + op: "Assign" + input: "bert/encoder/layer_7/output/dense/kernel/adam_v" + input: "save/RestoreV2:494" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_495" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta" + input: "save/RestoreV2:495" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_496" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:496" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_497" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:497" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_498" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma" + input: "save/RestoreV2:498" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_499" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:499" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_500" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:500" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_501" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/dense/bias" + input: "save/RestoreV2:501" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_502" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/dense/bias/adam_m" + input: "save/RestoreV2:502" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_503" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/dense/bias/adam_v" + input: "save/RestoreV2:503" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_504" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/dense/kernel" + input: "save/RestoreV2:504" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_505" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + input: "save/RestoreV2:505" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_506" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + input: "save/RestoreV2:506" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_507" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/key/bias" + input: "save/RestoreV2:507" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_508" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/key/bias/adam_m" + input: "save/RestoreV2:508" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_509" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/key/bias/adam_v" + input: "save/RestoreV2:509" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_510" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/key/kernel" + input: "save/RestoreV2:510" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_511" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_m" + input: "save/RestoreV2:511" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_512" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_v" + input: "save/RestoreV2:512" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_513" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/query/bias" + input: "save/RestoreV2:513" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_514" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/query/bias/adam_m" + input: "save/RestoreV2:514" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_515" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/query/bias/adam_v" + input: "save/RestoreV2:515" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_516" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/query/kernel" + input: "save/RestoreV2:516" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_517" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_m" + input: "save/RestoreV2:517" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_518" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_v" + input: "save/RestoreV2:518" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_519" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/value/bias" + input: "save/RestoreV2:519" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_520" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/value/bias/adam_m" + input: "save/RestoreV2:520" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_521" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/value/bias/adam_v" + input: "save/RestoreV2:521" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_522" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/value/kernel" + input: "save/RestoreV2:522" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_523" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_m" + input: "save/RestoreV2:523" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_524" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_v" + input: "save/RestoreV2:524" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_525" + op: "Assign" + input: "bert/encoder/layer_8/intermediate/dense/bias" + input: "save/RestoreV2:525" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_526" + op: "Assign" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_m" + input: "save/RestoreV2:526" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_527" + op: "Assign" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_v" + input: "save/RestoreV2:527" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_528" + op: "Assign" + input: "bert/encoder/layer_8/intermediate/dense/kernel" + input: "save/RestoreV2:528" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_529" + op: "Assign" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + input: "save/RestoreV2:529" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_530" + op: "Assign" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + input: "save/RestoreV2:530" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_531" + op: "Assign" + input: "bert/encoder/layer_8/output/LayerNorm/beta" + input: "save/RestoreV2:531" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_532" + op: "Assign" + input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:532" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_533" + op: "Assign" + input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:533" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_534" + op: "Assign" + input: "bert/encoder/layer_8/output/LayerNorm/gamma" + input: "save/RestoreV2:534" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_535" + op: "Assign" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:535" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_536" + op: "Assign" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:536" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_537" + op: "Assign" + input: "bert/encoder/layer_8/output/dense/bias" + input: "save/RestoreV2:537" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_538" + op: "Assign" + input: "bert/encoder/layer_8/output/dense/bias/adam_m" + input: "save/RestoreV2:538" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_539" + op: "Assign" + input: "bert/encoder/layer_8/output/dense/bias/adam_v" + input: "save/RestoreV2:539" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_540" + op: "Assign" + input: "bert/encoder/layer_8/output/dense/kernel" + input: "save/RestoreV2:540" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_541" + op: "Assign" + input: "bert/encoder/layer_8/output/dense/kernel/adam_m" + input: "save/RestoreV2:541" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_542" + op: "Assign" + input: "bert/encoder/layer_8/output/dense/kernel/adam_v" + input: "save/RestoreV2:542" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_543" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta" + input: "save/RestoreV2:543" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_544" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:544" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_545" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:545" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_546" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma" + input: "save/RestoreV2:546" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_547" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:547" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_548" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:548" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_549" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/dense/bias" + input: "save/RestoreV2:549" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_550" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/dense/bias/adam_m" + input: "save/RestoreV2:550" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_551" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/dense/bias/adam_v" + input: "save/RestoreV2:551" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_552" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/dense/kernel" + input: "save/RestoreV2:552" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_553" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + input: "save/RestoreV2:553" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_554" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + input: "save/RestoreV2:554" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_555" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/key/bias" + input: "save/RestoreV2:555" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_556" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/key/bias/adam_m" + input: "save/RestoreV2:556" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_557" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/key/bias/adam_v" + input: "save/RestoreV2:557" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_558" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/key/kernel" + input: "save/RestoreV2:558" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_559" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_m" + input: "save/RestoreV2:559" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_560" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_v" + input: "save/RestoreV2:560" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_561" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/query/bias" + input: "save/RestoreV2:561" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_562" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/query/bias/adam_m" + input: "save/RestoreV2:562" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_563" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/query/bias/adam_v" + input: "save/RestoreV2:563" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_564" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/query/kernel" + input: "save/RestoreV2:564" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_565" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_m" + input: "save/RestoreV2:565" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_566" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_v" + input: "save/RestoreV2:566" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_567" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/value/bias" + input: "save/RestoreV2:567" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_568" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/value/bias/adam_m" + input: "save/RestoreV2:568" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_569" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/value/bias/adam_v" + input: "save/RestoreV2:569" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_570" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/value/kernel" + input: "save/RestoreV2:570" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_571" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_m" + input: "save/RestoreV2:571" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_572" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_v" + input: "save/RestoreV2:572" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_573" + op: "Assign" + input: "bert/encoder/layer_9/intermediate/dense/bias" + input: "save/RestoreV2:573" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_574" + op: "Assign" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_m" + input: "save/RestoreV2:574" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_575" + op: "Assign" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_v" + input: "save/RestoreV2:575" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_576" + op: "Assign" + input: "bert/encoder/layer_9/intermediate/dense/kernel" + input: "save/RestoreV2:576" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_577" + op: "Assign" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + input: "save/RestoreV2:577" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_578" + op: "Assign" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + input: "save/RestoreV2:578" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_579" + op: "Assign" + input: "bert/encoder/layer_9/output/LayerNorm/beta" + input: "save/RestoreV2:579" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_580" + op: "Assign" + input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:580" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_581" + op: "Assign" + input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:581" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_582" + op: "Assign" + input: "bert/encoder/layer_9/output/LayerNorm/gamma" + input: "save/RestoreV2:582" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_583" + op: "Assign" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:583" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_584" + op: "Assign" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:584" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_585" + op: "Assign" + input: "bert/encoder/layer_9/output/dense/bias" + input: "save/RestoreV2:585" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_586" + op: "Assign" + input: "bert/encoder/layer_9/output/dense/bias/adam_m" + input: "save/RestoreV2:586" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_587" + op: "Assign" + input: "bert/encoder/layer_9/output/dense/bias/adam_v" + input: "save/RestoreV2:587" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_588" + op: "Assign" + input: "bert/encoder/layer_9/output/dense/kernel" + input: "save/RestoreV2:588" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_589" + op: "Assign" + input: "bert/encoder/layer_9/output/dense/kernel/adam_m" + input: "save/RestoreV2:589" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_590" + op: "Assign" + input: "bert/encoder/layer_9/output/dense/kernel/adam_v" + input: "save/RestoreV2:590" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_591" + op: "Assign" + input: "bert/pooler/dense/bias" + input: "save/RestoreV2:591" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_592" + op: "Assign" + input: "bert/pooler/dense/bias/adam_m" + input: "save/RestoreV2:592" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_593" + op: "Assign" + input: "bert/pooler/dense/bias/adam_v" + input: "save/RestoreV2:593" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_594" + op: "Assign" + input: "bert/pooler/dense/kernel" + input: "save/RestoreV2:594" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_595" + op: "Assign" + input: "bert/pooler/dense/kernel/adam_m" + input: "save/RestoreV2:595" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_596" + op: "Assign" + input: "bert/pooler/dense/kernel/adam_v" + input: "save/RestoreV2:596" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Identity_1" + op: "Identity" + input: "save/RestoreV2:597" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + unknown_rank: true + } + } + } + } +} +node { + name: "save/AssignVariableOp" + op: "AssignVariableOp" + input: "global_step" + input: "save/Identity_1" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } +} +node { + name: "save/Assign_597" + op: "Assign" + input: "output_bias" + input: "save/RestoreV2:598" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_598" + op: "Assign" + input: "output_bias/adam_m" + input: "save/RestoreV2:599" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_599" + op: "Assign" + input: "output_bias/adam_v" + input: "save/RestoreV2:600" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_600" + op: "Assign" + input: "output_weights" + input: "save/RestoreV2:601" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@output_weights" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_601" + op: "Assign" + input: "output_weights/adam_m" + input: "save/RestoreV2:602" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@output_weights/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_602" + op: "Assign" + input: "output_weights/adam_v" + input: "save/RestoreV2:603" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@output_weights/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/restore_shard" + op: "NoOp" + input: "^save/Assign" + input: "^save/AssignVariableOp" + input: "^save/Assign_1" + input: "^save/Assign_10" + input: "^save/Assign_100" + input: "^save/Assign_101" + input: "^save/Assign_102" + input: "^save/Assign_103" + input: "^save/Assign_104" + input: "^save/Assign_105" + input: "^save/Assign_106" + input: "^save/Assign_107" + input: "^save/Assign_108" + input: "^save/Assign_109" + input: "^save/Assign_11" + input: "^save/Assign_110" + input: "^save/Assign_111" + input: "^save/Assign_112" + input: "^save/Assign_113" + input: "^save/Assign_114" + input: "^save/Assign_115" + input: "^save/Assign_116" + input: "^save/Assign_117" + input: "^save/Assign_118" + input: "^save/Assign_119" + input: "^save/Assign_12" + input: "^save/Assign_120" + input: "^save/Assign_121" + input: "^save/Assign_122" + input: "^save/Assign_123" + input: "^save/Assign_124" + input: "^save/Assign_125" + input: "^save/Assign_126" + input: "^save/Assign_127" + input: "^save/Assign_128" + input: "^save/Assign_129" + input: "^save/Assign_13" + input: "^save/Assign_130" + input: "^save/Assign_131" + input: "^save/Assign_132" + input: "^save/Assign_133" + input: "^save/Assign_134" + input: "^save/Assign_135" + input: "^save/Assign_136" + input: "^save/Assign_137" + input: "^save/Assign_138" + input: "^save/Assign_139" + input: "^save/Assign_14" + input: "^save/Assign_140" + input: "^save/Assign_141" + input: "^save/Assign_142" + input: "^save/Assign_143" + input: "^save/Assign_144" + input: "^save/Assign_145" + input: "^save/Assign_146" + input: "^save/Assign_147" + input: "^save/Assign_148" + input: "^save/Assign_149" + input: "^save/Assign_15" + input: "^save/Assign_150" + input: "^save/Assign_151" + input: "^save/Assign_152" + input: "^save/Assign_153" + input: "^save/Assign_154" + input: "^save/Assign_155" + input: "^save/Assign_156" + input: "^save/Assign_157" + input: "^save/Assign_158" + input: "^save/Assign_159" + input: "^save/Assign_16" + input: "^save/Assign_160" + input: "^save/Assign_161" + input: "^save/Assign_162" + input: "^save/Assign_163" + input: "^save/Assign_164" + input: "^save/Assign_165" + input: "^save/Assign_166" + input: "^save/Assign_167" + input: "^save/Assign_168" + input: "^save/Assign_169" + input: "^save/Assign_17" + input: "^save/Assign_170" + input: "^save/Assign_171" + input: "^save/Assign_172" + input: "^save/Assign_173" + input: "^save/Assign_174" + input: "^save/Assign_175" + input: "^save/Assign_176" + input: "^save/Assign_177" + input: "^save/Assign_178" + input: "^save/Assign_179" + input: "^save/Assign_18" + input: "^save/Assign_180" + input: "^save/Assign_181" + input: "^save/Assign_182" + input: "^save/Assign_183" + input: "^save/Assign_184" + input: "^save/Assign_185" + input: "^save/Assign_186" + input: "^save/Assign_187" + input: "^save/Assign_188" + input: "^save/Assign_189" + input: "^save/Assign_19" + input: "^save/Assign_190" + input: "^save/Assign_191" + input: "^save/Assign_192" + input: "^save/Assign_193" + input: "^save/Assign_194" + input: "^save/Assign_195" + input: "^save/Assign_196" + input: "^save/Assign_197" + input: "^save/Assign_198" + input: "^save/Assign_199" + input: "^save/Assign_2" + input: "^save/Assign_20" + input: "^save/Assign_200" + input: "^save/Assign_201" + input: "^save/Assign_202" + input: "^save/Assign_203" + input: "^save/Assign_204" + input: "^save/Assign_205" + input: "^save/Assign_206" + input: "^save/Assign_207" + input: "^save/Assign_208" + input: "^save/Assign_209" + input: "^save/Assign_21" + input: "^save/Assign_210" + input: "^save/Assign_211" + input: "^save/Assign_212" + input: "^save/Assign_213" + input: "^save/Assign_214" + input: "^save/Assign_215" + input: "^save/Assign_216" + input: "^save/Assign_217" + input: "^save/Assign_218" + input: "^save/Assign_219" + input: "^save/Assign_22" + input: "^save/Assign_220" + input: "^save/Assign_221" + input: "^save/Assign_222" + input: "^save/Assign_223" + input: "^save/Assign_224" + input: "^save/Assign_225" + input: "^save/Assign_226" + input: "^save/Assign_227" + input: "^save/Assign_228" + input: "^save/Assign_229" + input: "^save/Assign_23" + input: "^save/Assign_230" + input: "^save/Assign_231" + input: "^save/Assign_232" + input: "^save/Assign_233" + input: "^save/Assign_234" + input: "^save/Assign_235" + input: "^save/Assign_236" + input: "^save/Assign_237" + input: "^save/Assign_238" + input: "^save/Assign_239" + input: "^save/Assign_24" + input: "^save/Assign_240" + input: "^save/Assign_241" + input: "^save/Assign_242" + input: "^save/Assign_243" + input: "^save/Assign_244" + input: "^save/Assign_245" + input: "^save/Assign_246" + input: "^save/Assign_247" + input: "^save/Assign_248" + input: "^save/Assign_249" + input: "^save/Assign_25" + input: "^save/Assign_250" + input: "^save/Assign_251" + input: "^save/Assign_252" + input: "^save/Assign_253" + input: "^save/Assign_254" + input: "^save/Assign_255" + input: "^save/Assign_256" + input: "^save/Assign_257" + input: "^save/Assign_258" + input: "^save/Assign_259" + input: "^save/Assign_26" + input: "^save/Assign_260" + input: "^save/Assign_261" + input: "^save/Assign_262" + input: "^save/Assign_263" + input: "^save/Assign_264" + input: "^save/Assign_265" + input: "^save/Assign_266" + input: "^save/Assign_267" + input: "^save/Assign_268" + input: "^save/Assign_269" + input: "^save/Assign_27" + input: "^save/Assign_270" + input: "^save/Assign_271" + input: "^save/Assign_272" + input: "^save/Assign_273" + input: "^save/Assign_274" + input: "^save/Assign_275" + input: "^save/Assign_276" + input: "^save/Assign_277" + input: "^save/Assign_278" + input: "^save/Assign_279" + input: "^save/Assign_28" + input: "^save/Assign_280" + input: "^save/Assign_281" + input: "^save/Assign_282" + input: "^save/Assign_283" + input: "^save/Assign_284" + input: "^save/Assign_285" + input: "^save/Assign_286" + input: "^save/Assign_287" + input: "^save/Assign_288" + input: "^save/Assign_289" + input: "^save/Assign_29" + input: "^save/Assign_290" + input: "^save/Assign_291" + input: "^save/Assign_292" + input: "^save/Assign_293" + input: "^save/Assign_294" + input: "^save/Assign_295" + input: "^save/Assign_296" + input: "^save/Assign_297" + input: "^save/Assign_298" + input: "^save/Assign_299" + input: "^save/Assign_3" + input: "^save/Assign_30" + input: "^save/Assign_300" + input: "^save/Assign_301" + input: "^save/Assign_302" + input: "^save/Assign_303" + input: "^save/Assign_304" + input: "^save/Assign_305" + input: "^save/Assign_306" + input: "^save/Assign_307" + input: "^save/Assign_308" + input: "^save/Assign_309" + input: "^save/Assign_31" + input: "^save/Assign_310" + input: "^save/Assign_311" + input: "^save/Assign_312" + input: "^save/Assign_313" + input: "^save/Assign_314" + input: "^save/Assign_315" + input: "^save/Assign_316" + input: "^save/Assign_317" + input: "^save/Assign_318" + input: "^save/Assign_319" + input: "^save/Assign_32" + input: "^save/Assign_320" + input: "^save/Assign_321" + input: "^save/Assign_322" + input: "^save/Assign_323" + input: "^save/Assign_324" + input: "^save/Assign_325" + input: "^save/Assign_326" + input: "^save/Assign_327" + input: "^save/Assign_328" + input: "^save/Assign_329" + input: "^save/Assign_33" + input: "^save/Assign_330" + input: "^save/Assign_331" + input: "^save/Assign_332" + input: "^save/Assign_333" + input: "^save/Assign_334" + input: "^save/Assign_335" + input: "^save/Assign_336" + input: "^save/Assign_337" + input: "^save/Assign_338" + input: "^save/Assign_339" + input: "^save/Assign_34" + input: "^save/Assign_340" + input: "^save/Assign_341" + input: "^save/Assign_342" + input: "^save/Assign_343" + input: "^save/Assign_344" + input: "^save/Assign_345" + input: "^save/Assign_346" + input: "^save/Assign_347" + input: "^save/Assign_348" + input: "^save/Assign_349" + input: "^save/Assign_35" + input: "^save/Assign_350" + input: "^save/Assign_351" + input: "^save/Assign_352" + input: "^save/Assign_353" + input: "^save/Assign_354" + input: "^save/Assign_355" + input: "^save/Assign_356" + input: "^save/Assign_357" + input: "^save/Assign_358" + input: "^save/Assign_359" + input: "^save/Assign_36" + input: "^save/Assign_360" + input: "^save/Assign_361" + input: "^save/Assign_362" + input: "^save/Assign_363" + input: "^save/Assign_364" + input: "^save/Assign_365" + input: "^save/Assign_366" + input: "^save/Assign_367" + input: "^save/Assign_368" + input: "^save/Assign_369" + input: "^save/Assign_37" + input: "^save/Assign_370" + input: "^save/Assign_371" + input: "^save/Assign_372" + input: "^save/Assign_373" + input: "^save/Assign_374" + input: "^save/Assign_375" + input: "^save/Assign_376" + input: "^save/Assign_377" + input: "^save/Assign_378" + input: "^save/Assign_379" + input: "^save/Assign_38" + input: "^save/Assign_380" + input: "^save/Assign_381" + input: "^save/Assign_382" + input: "^save/Assign_383" + input: "^save/Assign_384" + input: "^save/Assign_385" + input: "^save/Assign_386" + input: "^save/Assign_387" + input: "^save/Assign_388" + input: "^save/Assign_389" + input: "^save/Assign_39" + input: "^save/Assign_390" + input: "^save/Assign_391" + input: "^save/Assign_392" + input: "^save/Assign_393" + input: "^save/Assign_394" + input: "^save/Assign_395" + input: "^save/Assign_396" + input: "^save/Assign_397" + input: "^save/Assign_398" + input: "^save/Assign_399" + input: "^save/Assign_4" + input: "^save/Assign_40" + input: "^save/Assign_400" + input: "^save/Assign_401" + input: "^save/Assign_402" + input: "^save/Assign_403" + input: "^save/Assign_404" + input: "^save/Assign_405" + input: "^save/Assign_406" + input: "^save/Assign_407" + input: "^save/Assign_408" + input: "^save/Assign_409" + input: "^save/Assign_41" + input: "^save/Assign_410" + input: "^save/Assign_411" + input: "^save/Assign_412" + input: "^save/Assign_413" + input: "^save/Assign_414" + input: "^save/Assign_415" + input: "^save/Assign_416" + input: "^save/Assign_417" + input: "^save/Assign_418" + input: "^save/Assign_419" + input: "^save/Assign_42" + input: "^save/Assign_420" + input: "^save/Assign_421" + input: "^save/Assign_422" + input: "^save/Assign_423" + input: "^save/Assign_424" + input: "^save/Assign_425" + input: "^save/Assign_426" + input: "^save/Assign_427" + input: "^save/Assign_428" + input: "^save/Assign_429" + input: "^save/Assign_43" + input: "^save/Assign_430" + input: "^save/Assign_431" + input: "^save/Assign_432" + input: "^save/Assign_433" + input: "^save/Assign_434" + input: "^save/Assign_435" + input: "^save/Assign_436" + input: "^save/Assign_437" + input: "^save/Assign_438" + input: "^save/Assign_439" + input: "^save/Assign_44" + input: "^save/Assign_440" + input: "^save/Assign_441" + input: "^save/Assign_442" + input: "^save/Assign_443" + input: "^save/Assign_444" + input: "^save/Assign_445" + input: "^save/Assign_446" + input: "^save/Assign_447" + input: "^save/Assign_448" + input: "^save/Assign_449" + input: "^save/Assign_45" + input: "^save/Assign_450" + input: "^save/Assign_451" + input: "^save/Assign_452" + input: "^save/Assign_453" + input: "^save/Assign_454" + input: "^save/Assign_455" + input: "^save/Assign_456" + input: "^save/Assign_457" + input: "^save/Assign_458" + input: "^save/Assign_459" + input: "^save/Assign_46" + input: "^save/Assign_460" + input: "^save/Assign_461" + input: "^save/Assign_462" + input: "^save/Assign_463" + input: "^save/Assign_464" + input: "^save/Assign_465" + input: "^save/Assign_466" + input: "^save/Assign_467" + input: "^save/Assign_468" + input: "^save/Assign_469" + input: "^save/Assign_47" + input: "^save/Assign_470" + input: "^save/Assign_471" + input: "^save/Assign_472" + input: "^save/Assign_473" + input: "^save/Assign_474" + input: "^save/Assign_475" + input: "^save/Assign_476" + input: "^save/Assign_477" + input: "^save/Assign_478" + input: "^save/Assign_479" + input: "^save/Assign_48" + input: "^save/Assign_480" + input: "^save/Assign_481" + input: "^save/Assign_482" + input: "^save/Assign_483" + input: "^save/Assign_484" + input: "^save/Assign_485" + input: "^save/Assign_486" + input: "^save/Assign_487" + input: "^save/Assign_488" + input: "^save/Assign_489" + input: "^save/Assign_49" + input: "^save/Assign_490" + input: "^save/Assign_491" + input: "^save/Assign_492" + input: "^save/Assign_493" + input: "^save/Assign_494" + input: "^save/Assign_495" + input: "^save/Assign_496" + input: "^save/Assign_497" + input: "^save/Assign_498" + input: "^save/Assign_499" + input: "^save/Assign_5" + input: "^save/Assign_50" + input: "^save/Assign_500" + input: "^save/Assign_501" + input: "^save/Assign_502" + input: "^save/Assign_503" + input: "^save/Assign_504" + input: "^save/Assign_505" + input: "^save/Assign_506" + input: "^save/Assign_507" + input: "^save/Assign_508" + input: "^save/Assign_509" + input: "^save/Assign_51" + input: "^save/Assign_510" + input: "^save/Assign_511" + input: "^save/Assign_512" + input: "^save/Assign_513" + input: "^save/Assign_514" + input: "^save/Assign_515" + input: "^save/Assign_516" + input: "^save/Assign_517" + input: "^save/Assign_518" + input: "^save/Assign_519" + input: "^save/Assign_52" + input: "^save/Assign_520" + input: "^save/Assign_521" + input: "^save/Assign_522" + input: "^save/Assign_523" + input: "^save/Assign_524" + input: "^save/Assign_525" + input: "^save/Assign_526" + input: "^save/Assign_527" + input: "^save/Assign_528" + input: "^save/Assign_529" + input: "^save/Assign_53" + input: "^save/Assign_530" + input: "^save/Assign_531" + input: "^save/Assign_532" + input: "^save/Assign_533" + input: "^save/Assign_534" + input: "^save/Assign_535" + input: "^save/Assign_536" + input: "^save/Assign_537" + input: "^save/Assign_538" + input: "^save/Assign_539" + input: "^save/Assign_54" + input: "^save/Assign_540" + input: "^save/Assign_541" + input: "^save/Assign_542" + input: "^save/Assign_543" + input: "^save/Assign_544" + input: "^save/Assign_545" + input: "^save/Assign_546" + input: "^save/Assign_547" + input: "^save/Assign_548" + input: "^save/Assign_549" + input: "^save/Assign_55" + input: "^save/Assign_550" + input: "^save/Assign_551" + input: "^save/Assign_552" + input: "^save/Assign_553" + input: "^save/Assign_554" + input: "^save/Assign_555" + input: "^save/Assign_556" + input: "^save/Assign_557" + input: "^save/Assign_558" + input: "^save/Assign_559" + input: "^save/Assign_56" + input: "^save/Assign_560" + input: "^save/Assign_561" + input: "^save/Assign_562" + input: "^save/Assign_563" + input: "^save/Assign_564" + input: "^save/Assign_565" + input: "^save/Assign_566" + input: "^save/Assign_567" + input: "^save/Assign_568" + input: "^save/Assign_569" + input: "^save/Assign_57" + input: "^save/Assign_570" + input: "^save/Assign_571" + input: "^save/Assign_572" + input: "^save/Assign_573" + input: "^save/Assign_574" + input: "^save/Assign_575" + input: "^save/Assign_576" + input: "^save/Assign_577" + input: "^save/Assign_578" + input: "^save/Assign_579" + input: "^save/Assign_58" + input: "^save/Assign_580" + input: "^save/Assign_581" + input: "^save/Assign_582" + input: "^save/Assign_583" + input: "^save/Assign_584" + input: "^save/Assign_585" + input: "^save/Assign_586" + input: "^save/Assign_587" + input: "^save/Assign_588" + input: "^save/Assign_589" + input: "^save/Assign_59" + input: "^save/Assign_590" + input: "^save/Assign_591" + input: "^save/Assign_592" + input: "^save/Assign_593" + input: "^save/Assign_594" + input: "^save/Assign_595" + input: "^save/Assign_596" + input: "^save/Assign_597" + input: "^save/Assign_598" + input: "^save/Assign_599" + input: "^save/Assign_6" + input: "^save/Assign_60" + input: "^save/Assign_600" + input: "^save/Assign_601" + input: "^save/Assign_602" + input: "^save/Assign_61" + input: "^save/Assign_62" + input: "^save/Assign_63" + input: "^save/Assign_64" + input: "^save/Assign_65" + input: "^save/Assign_66" + input: "^save/Assign_67" + input: "^save/Assign_68" + input: "^save/Assign_69" + input: "^save/Assign_7" + input: "^save/Assign_70" + input: "^save/Assign_71" + input: "^save/Assign_72" + input: "^save/Assign_73" + input: "^save/Assign_74" + input: "^save/Assign_75" + input: "^save/Assign_76" + input: "^save/Assign_77" + input: "^save/Assign_78" + input: "^save/Assign_79" + input: "^save/Assign_8" + input: "^save/Assign_80" + input: "^save/Assign_81" + input: "^save/Assign_82" + input: "^save/Assign_83" + input: "^save/Assign_84" + input: "^save/Assign_85" + input: "^save/Assign_86" + input: "^save/Assign_87" + input: "^save/Assign_88" + input: "^save/Assign_89" + input: "^save/Assign_9" + input: "^save/Assign_90" + input: "^save/Assign_91" + input: "^save/Assign_92" + input: "^save/Assign_93" + input: "^save/Assign_94" + input: "^save/Assign_95" + input: "^save/Assign_96" + input: "^save/Assign_97" + input: "^save/Assign_98" + input: "^save/Assign_99" +} +node { + name: "save/restore_all" + op: "NoOp" + input: "^save/restore_shard" +} +library { + function { + signature { + name: "__inference_tf_data_experimental_map_and_batch__61" + input_arg { + name: "args_0" + type: DT_STRING + } + output_arg { + name: "identity" + type: DT_INT32 + } + output_arg { + name: "identity_1" + type: DT_INT32 + } + output_arg { + name: "identity_2" + type: DT_INT32 + } + output_arg { + name: "identity_3" + type: DT_INT32 + } + output_arg { + name: "identity_4" + type: DT_INT32 + } + } + node_def { + name: "ParseSingleExample/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + dim { + } + } + } + } + } + } + node_def { + name: "ParseSingleExample/Const_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + dim { + } + } + } + } + } + } + node_def { + name: "ParseSingleExample/Const_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + dim { + } + } + } + } + } + } + node_def { + name: "ParseSingleExample/Const_3" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + dim { + } + } + } + } + } + } + node_def { + name: "ParseSingleExample/Const_4" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + dim { + } + } + } + } + } + } + node_def { + name: "ParseSingleExample/ParseSingleExample" + op: "ParseSingleExample" + input: "args_0" + input: "ParseSingleExample/Const:output:0" + input: "ParseSingleExample/Const_1:output:0" + input: "ParseSingleExample/Const_2:output:0" + input: "ParseSingleExample/Const_3:output:0" + input: "ParseSingleExample/Const_4:output:0" + attr { + key: "Tdense" + value { + list { + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + shape { + dim { + size: 128 + } + } + shape { + } + shape { + } + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "dense_keys" + value { + list { + s: "input_ids" + s: "input_mask" + s: "is_real_example" + s: "label_ids" + s: "segment_ids" + } + } + } + attr { + key: "dense_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + shape { + dim { + size: 128 + } + } + shape { + } + shape { + } + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "num_sparse" + value { + i: 0 + } + } + attr { + key: "sparse_keys" + value { + list { + } + } + } + attr { + key: "sparse_types" + value { + list { + } + } + } + } + node_def { + name: "ToInt32" + op: "Cast" + input: "ParseSingleExample/ParseSingleExample:dense_values:0" + attr { + key: "DstT" + value { + type: DT_INT32 + } + } + attr { + key: "SrcT" + value { + type: DT_INT64 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + } + node_def { + name: "ToInt32_1" + op: "Cast" + input: "ParseSingleExample/ParseSingleExample:dense_values:1" + attr { + key: "DstT" + value { + type: DT_INT32 + } + } + attr { + key: "SrcT" + value { + type: DT_INT64 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + } + node_def { + name: "ToInt32_2" + op: "Cast" + input: "ParseSingleExample/ParseSingleExample:dense_values:2" + attr { + key: "DstT" + value { + type: DT_INT32 + } + } + attr { + key: "SrcT" + value { + type: DT_INT64 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + node_def { + name: "ToInt32_3" + op: "Cast" + input: "ParseSingleExample/ParseSingleExample:dense_values:3" + attr { + key: "DstT" + value { + type: DT_INT32 + } + } + attr { + key: "SrcT" + value { + type: DT_INT64 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + node_def { + name: "ToInt32_4" + op: "Cast" + input: "ParseSingleExample/ParseSingleExample:dense_values:4" + attr { + key: "DstT" + value { + type: DT_INT32 + } + } + attr { + key: "SrcT" + value { + type: DT_INT64 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + } + node_def { + name: "Identity" + op: "Identity" + input: "ToInt32:y:0" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + } + node_def { + name: "Identity_1" + op: "Identity" + input: "ToInt32_1:y:0" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + } + node_def { + name: "Identity_2" + op: "Identity" + input: "ToInt32_2:y:0" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + node_def { + name: "Identity_3" + op: "Identity" + input: "ToInt32_3:y:0" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + node_def { + name: "Identity_4" + op: "Identity" + input: "ToInt32_4:y:0" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + } + ret { + key: "identity" + value: "Identity:output:0" + } + ret { + key: "identity_1" + value: "Identity_1:output:0" + } + ret { + key: "identity_2" + value: "Identity_2:output:0" + } + ret { + key: "identity_3" + value: "Identity_3:output:0" + } + ret { + key: "identity_4" + value: "Identity_4:output:0" + } + attr { + key: "_input_shapes" + value { + list { + shape { + } + } + } + } + arg_attr { + value { + attr { + key: "_user_specified_name" + value { + s: "args_0" + } + } + } + } + } + function { + signature { + name: "__inference_Dataset_flat_map_read_one_file_31" + input_arg { + name: "args_0" + type: DT_STRING + } + output_arg { + name: "identity" + type: DT_VARIANT + } + is_stateful: true + control_output: "TFRecordDataset" + } + node_def { + name: "compression_type" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "" + } + } + } + } + node_def { + name: "buffer_size" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 262144 + } + } + } + } + node_def { + name: "TFRecordDataset" + op: "TFRecordDataset" + input: "args_0" + input: "compression_type:output:0" + input: "buffer_size:output:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + node_def { + name: "Identity" + op: "Identity" + input: "TFRecordDataset:handle:0" + input: "^TFRecordDataset" + attr { + key: "T" + value { + type: DT_VARIANT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + ret { + key: "identity" + value: "Identity:output:0" + } + attr { + key: "_input_shapes" + value { + list { + shape { + } + } + } + } + control_ret { + key: "TFRecordDataset" + value: "TFRecordDataset" + } + arg_attr { + value { + attr { + key: "_user_specified_name" + value { + s: "args_0" + } + } + } + } + } +} +versions { + producer: 38 + min_consumer: 12 +} diff --git a/tmp/epout/model.ckpt-14062.index b/tmp/epout/model.ckpt-14062.index new file mode 100644 index 0000000..03cf57b Binary files /dev/null and b/tmp/epout/model.ckpt-14062.index differ diff --git a/tmp/epout/model.ckpt-14062.meta b/tmp/epout/model.ckpt-14062.meta new file mode 100644 index 0000000..6bd51ea Binary files /dev/null and b/tmp/epout/model.ckpt-14062.meta differ diff --git a/tmp/epout/train.tf_record b/tmp/epout/train.tf_record new file mode 100644 index 0000000..67a55f3 Binary files /dev/null and b/tmp/epout/train.tf_record differ diff --git a/tmp/eppredict/predict.tf_record b/tmp/eppredict/predict.tf_record new file mode 100644 index 0000000..2a6f79e Binary files /dev/null and b/tmp/eppredict/predict.tf_record differ diff --git a/tmp/eppredict/test_results.tsv b/tmp/eppredict/test_results.tsv new file mode 100644 index 0000000..35a5f99 --- /dev/null +++ b/tmp/eppredict/test_results.tsv @@ -0,0 +1,134 @@ +4.643959e-06 0.99999154 3.8252842e-06 +4.8007923e-06 0.99999166 3.5236512e-06 +4.6748496e-06 0.9999913 4.0859113e-06 +4.4524345e-06 0.99999154 4.0169652e-06 +4.0334426e-06 0.999992 3.9886727e-06 +4.3244304e-06 0.9999919 3.781365e-06 +4.126574e-06 0.999992 3.8155436e-06 +4.2378488e-06 0.9999919 3.8473977e-06 +0.00011727525 0.99848527 0.0013974697 +4.361115e-06 0.99999154 4.017187e-06 +5.1768693e-06 0.9999908 4.06028e-06 +4.288634e-06 0.999992 3.709147e-06 +4.9716205e-06 0.99999106 3.9358397e-06 +4.182195e-06 0.99999213 3.6933725e-06 +4.549165e-06 0.9999913 4.1921107e-06 +6.6755088e-06 0.99998903 4.339319e-06 +4.595618e-06 0.9999914 3.9184333e-06 +4.607402e-06 0.99999166 3.752449e-06 +4.598755e-06 0.9999919 3.4854709e-06 +4.8619454e-06 0.9999912 3.882037e-06 +4.1419257e-06 0.9999918 4.000119e-06 +4.784566e-06 0.99999154 3.654619e-06 +4.388862e-06 0.9999919 3.6818228e-06 +5.644322e-06 0.9999908 3.5793255e-06 +3.823311e-06 0.99999154 4.630431e-06 +4.244102e-06 0.999992 3.6986664e-06 +4.2734914e-06 0.999992 3.6995482e-06 +4.3241253e-06 0.9999919 3.7916818e-06 +4.4547583e-06 0.9999907 4.839659e-06 +4.5243414e-06 0.9999918 3.6832657e-06 +8.419241e-06 0.99998474 6.8440236e-06 +7.966646e-06 0.9999751 1.6936197e-05 +5.1216794e-06 0.9999901 4.733681e-06 +5.034731e-06 0.9999912 3.794812e-06 +0.0021447523 0.9975885 0.00026675928 +5.646126e-06 0.9999908 3.6186827e-06 +1.690044e-05 0.99997103 1.2073235e-05 +4.9650066e-06 0.9999912 3.8529765e-06 +4.904027e-06 0.99999094 4.1881613e-06 +0.9271971 0.008783599 0.064019315 +4.868973e-06 0.9999918 3.3974088e-06 +4.9225746e-06 0.9999913 3.7550608e-06 +3.858802e-06 0.9999924 3.8034468e-06 +3.494936e-05 0.9999577 7.3692418e-06 +4.923359e-06 0.9999913 3.849029e-06 +5.178022e-06 0.9999914 3.4967088e-06 +4.121945e-06 0.99999213 3.7508728e-06 +4.451608e-06 0.9999919 3.651948e-06 +0.00792893 0.98557657 0.0064944625 +3.5600256e-06 0.9999896 6.819469e-06 +4.801607e-06 0.99999154 3.753082e-06 +0.23659243 0.7619511 0.001456472 +4.4562576e-06 0.99999154 4.027041e-06 +0.0015988095 0.99809843 0.00030284424 +9.155851e-06 0.99998546 5.311816e-06 +6.3670245e-06 0.9999906 3.0769254e-06 +4.1778785e-06 0.9999914 4.376392e-06 +4.648281e-06 0.999992 3.3429906e-06 +5.194813e-06 0.99999106 3.7433254e-06 +9.076348e-06 0.9999436 4.7352063e-05 +4.3432983e-06 0.999992 3.7271493e-06 +4.302407e-06 0.9999902 5.4830507e-06 +5.4339334e-06 0.9999907 3.8040862e-06 +4.391311e-06 0.9999918 3.8109024e-06 +6.945087e-06 0.9999863 6.775152e-06 +5.1417023e-06 0.9999895 5.3861663e-06 +0.0011567149 0.9985071 0.00033610596 +5.3787658e-06 0.9999907 3.8785456e-06 +1.8892406e-05 0.9999585 2.2630838e-05 +0.0015190784 0.997843 0.00063789665 +6.7695796e-06 0.9999875 5.740492e-06 +5.1006527e-06 0.99999034 4.5368133e-06 +5.47516e-06 0.99998915 5.3493272e-06 +4.8562415e-06 0.99999034 4.7732487e-06 +0.060555745 0.0001441841 0.93930006 +0.052763145 0.94372463 0.0035121434 +4.3671207e-06 0.99999166 3.9508695e-06 +4.778654e-06 0.9999901 5.128561e-06 +4.7153376e-06 0.9999908 4.362817e-06 +4.2666793e-06 0.9999914 4.266248e-06 +3.838838e-06 0.99999225 3.990184e-06 +4.461001e-06 0.9999912 4.44564e-06 +4.0283635e-06 0.9999918 4.1213025e-06 +1.4158776e-05 0.99996805 1.7746825e-05 +8.674982e-05 0.9998728 4.0382252e-05 +4.290552e-06 0.9999919 3.838057e-06 +5.187617e-06 0.9999914 3.374771e-06 +6.3959133e-06 0.9999894 4.2049714e-06 +6.8617037e-06 0.9999896 3.4103866e-06 +4.4409358e-06 0.999992 3.5424264e-06 +5.4345987e-06 0.9999901 4.447051e-06 +4.135196e-06 0.99999166 4.2157744e-06 +4.7487447e-06 0.99999154 3.7494221e-06 +8.4601015e-06 0.9999864 5.146833e-06 +0.0010207603 0.9987452 0.00023391622 +4.5771494e-06 0.9999919 3.5248079e-06 +4.798046e-06 0.99999166 3.5586033e-06 +5.8361684e-06 0.99998987 4.2727647e-06 +5.2285122e-06 0.9999913 3.479859e-06 +4.372247e-06 0.9999918 3.8096887e-06 +4.5528377e-06 0.99999154 3.955717e-06 +4.7401645e-06 0.9999913 3.9730817e-06 +4.4522612e-06 0.9999918 3.8139272e-06 +4.5628153e-06 0.99999094 4.564952e-06 +5.596948e-06 0.99998975 4.607477e-06 +4.4438884e-06 0.9999919 3.6515894e-06 +4.682183e-06 0.9999919 3.397442e-06 +4.7578187e-06 0.9999913 3.9699153e-06 +6.961098e-06 0.9999745 1.8637946e-05 +4.2590627e-06 0.9999919 3.834863e-06 +5.1346065e-06 0.9999894 5.45891e-06 +4.871587e-06 0.9999896 5.451893e-06 +4.0097016e-06 0.9999918 4.1663734e-06 +5.319837e-06 0.99999046 4.156601e-06 +4.407603e-06 0.9999919 3.69892e-06 +4.4321364e-06 0.9999914 4.1590397e-06 +4.812539e-06 0.9999912 3.9999254e-06 +5.329538e-06 0.9999888 5.881711e-06 +4.2385377e-06 0.99999225 3.5127584e-06 +4.7709664e-06 0.99999094 4.2666857e-06 +9.780118e-06 0.99998 1.0236932e-05 +8.97482e-06 0.99997663 1.44237465e-05 +4.4326803e-06 0.9999919 3.6516205e-06 +4.6600417e-06 0.99999166 3.7069035e-06 +0.00041292913 0.99843234 0.001154748 +0.008509361 0.9900877 0.0014030206 +7.7520845e-06 0.9999875 4.7950984e-06 +4.8316547e-06 0.99999094 4.2039433e-06 +4.4381522e-06 0.99999166 3.9086926e-06 +5.5704777e-06 0.9999908 3.5597884e-06 +4.117504e-06 0.9999918 4.029943e-06 +5.205461e-06 0.9999893 5.5215824e-06 +4.6852315e-06 0.9999914 3.9398033e-06 +4.80286e-06 0.9999913 3.89835e-06 diff --git a/tokenization.py b/tokenization.py new file mode 100644 index 0000000..0ee1359 --- /dev/null +++ b/tokenization.py @@ -0,0 +1,399 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tokenization classes.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import re +import unicodedata +import six +import tensorflow as tf + + +def validate_case_matches_checkpoint(do_lower_case, init_checkpoint): + """Checks whether the casing config is consistent with the checkpoint name.""" + + # The casing has to be passed in by the user and there is no explicit check + # as to whether it matches the checkpoint. The casing information probably + # should have been stored in the bert_config.json file, but it's not, so + # we have to heuristically detect it to validate. + + if not init_checkpoint: + return + + m = re.match("^.*?([A-Za-z0-9_-]+)/bert_model.ckpt", init_checkpoint) + if m is None: + return + + model_name = m.group(1) + + lower_models = [ + "uncased_L-24_H-1024_A-16", "uncased_L-12_H-768_A-12", + "multilingual_L-12_H-768_A-12", "chinese_L-12_H-768_A-12" + ] + + cased_models = [ + "cased_L-12_H-768_A-12", "cased_L-24_H-1024_A-16", + "multi_cased_L-12_H-768_A-12" + ] + + is_bad_config = False + if model_name in lower_models and not do_lower_case: + is_bad_config = True + actual_flag = "False" + case_name = "lowercased" + opposite_flag = "True" + + if model_name in cased_models and do_lower_case: + is_bad_config = True + actual_flag = "True" + case_name = "cased" + opposite_flag = "False" + + if is_bad_config: + raise ValueError( + "You passed in `--do_lower_case=%s` with `--init_checkpoint=%s`. " + "However, `%s` seems to be a %s model, so you " + "should pass in `--do_lower_case=%s` so that the fine-tuning matches " + "how the model was pre-training. If this error is wrong, please " + "just comment out this check." % (actual_flag, init_checkpoint, + model_name, case_name, opposite_flag)) + + +def convert_to_unicode(text): + """Converts `text` to Unicode (if it's not already), assuming utf-8 input.""" + if six.PY3: + if isinstance(text, str): + return text + elif isinstance(text, bytes): + return text.decode("utf-8", "ignore") + else: + raise ValueError("Unsupported string type: %s" % (type(text))) + elif six.PY2: + if isinstance(text, str): + return text.decode("utf-8", "ignore") + elif isinstance(text, unicode): + return text + else: + raise ValueError("Unsupported string type: %s" % (type(text))) + else: + raise ValueError("Not running on Python2 or Python 3?") + + +def printable_text(text): + """Returns text encoded in a way suitable for print or `tf.logging`.""" + + # These functions want `str` for both Python2 and Python3, but in one case + # it's a Unicode string and in the other it's a byte string. + if six.PY3: + if isinstance(text, str): + return text + elif isinstance(text, bytes): + return text.decode("utf-8", "ignore") + else: + raise ValueError("Unsupported string type: %s" % (type(text))) + elif six.PY2: + if isinstance(text, str): + return text + elif isinstance(text, unicode): + return text.encode("utf-8") + else: + raise ValueError("Unsupported string type: %s" % (type(text))) + else: + raise ValueError("Not running on Python2 or Python 3?") + + +def load_vocab(vocab_file): + """Loads a vocabulary file into a dictionary.""" + vocab = collections.OrderedDict() + index = 0 + with tf.gfile.GFile(vocab_file, "r") as reader: + while True: + token = convert_to_unicode(reader.readline()) + if not token: + break + token = token.strip() + vocab[token] = index + index += 1 + return vocab + + +def convert_by_vocab(vocab, items): + """Converts a sequence of [tokens|ids] using the vocab.""" + output = [] + for item in items: + output.append(vocab[item]) + return output + + +def convert_tokens_to_ids(vocab, tokens): + return convert_by_vocab(vocab, tokens) + + +def convert_ids_to_tokens(inv_vocab, ids): + return convert_by_vocab(inv_vocab, ids) + + +def whitespace_tokenize(text): + """Runs basic whitespace cleaning and splitting on a piece of text.""" + text = text.strip() + if not text: + return [] + tokens = text.split() + return tokens + + +class FullTokenizer(object): + """Runs end-to-end tokenziation.""" + + def __init__(self, vocab_file, do_lower_case=True): + self.vocab = load_vocab(vocab_file) + self.inv_vocab = {v: k for k, v in self.vocab.items()} + self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case) + self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab) + + def tokenize(self, text): + split_tokens = [] + for token in self.basic_tokenizer.tokenize(text): + for sub_token in self.wordpiece_tokenizer.tokenize(token): + split_tokens.append(sub_token) + + return split_tokens + + def convert_tokens_to_ids(self, tokens): + return convert_by_vocab(self.vocab, tokens) + + def convert_ids_to_tokens(self, ids): + return convert_by_vocab(self.inv_vocab, ids) + + +class BasicTokenizer(object): + """Runs basic tokenization (punctuation splitting, lower casing, etc.).""" + + def __init__(self, do_lower_case=True): + """Constructs a BasicTokenizer. + + Args: + do_lower_case: Whether to lower case the input. + """ + self.do_lower_case = do_lower_case + + def tokenize(self, text): + """Tokenizes a piece of text.""" + text = convert_to_unicode(text) + text = self._clean_text(text) + + # This was added on November 1st, 2018 for the multilingual and Chinese + # models. This is also applied to the English models now, but it doesn't + # matter since the English models were not trained on any Chinese data + # and generally don't have any Chinese data in them (there are Chinese + # characters in the vocabulary because Wikipedia does have some Chinese + # words in the English Wikipedia.). + text = self._tokenize_chinese_chars(text) + + orig_tokens = whitespace_tokenize(text) + split_tokens = [] + for token in orig_tokens: + if self.do_lower_case: + token = token.lower() + token = self._run_strip_accents(token) + split_tokens.extend(self._run_split_on_punc(token)) + + output_tokens = whitespace_tokenize(" ".join(split_tokens)) + return output_tokens + + def _run_strip_accents(self, text): + """Strips accents from a piece of text.""" + text = unicodedata.normalize("NFD", text) + output = [] + for char in text: + cat = unicodedata.category(char) + if cat == "Mn": + continue + output.append(char) + return "".join(output) + + def _run_split_on_punc(self, text): + """Splits punctuation on a piece of text.""" + chars = list(text) + i = 0 + start_new_word = True + output = [] + while i < len(chars): + char = chars[i] + if _is_punctuation(char): + output.append([char]) + start_new_word = True + else: + if start_new_word: + output.append([]) + start_new_word = False + output[-1].append(char) + i += 1 + + return ["".join(x) for x in output] + + def _tokenize_chinese_chars(self, text): + """Adds whitespace around any CJK character.""" + output = [] + for char in text: + cp = ord(char) + if self._is_chinese_char(cp): + output.append(" ") + output.append(char) + output.append(" ") + else: + output.append(char) + return "".join(output) + + def _is_chinese_char(self, cp): + """Checks whether CP is the codepoint of a CJK character.""" + # This defines a "chinese character" as anything in the CJK Unicode block: + # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block) + # + # Note that the CJK Unicode block is NOT all Japanese and Korean characters, + # despite its name. The modern Korean Hangul alphabet is a different block, + # as is Japanese Hiragana and Katakana. Those alphabets are used to write + # space-separated words, so they are not treated specially and handled + # like the all of the other languages. + if ((cp >= 0x4E00 and cp <= 0x9FFF) or # + (cp >= 0x3400 and cp <= 0x4DBF) or # + (cp >= 0x20000 and cp <= 0x2A6DF) or # + (cp >= 0x2A700 and cp <= 0x2B73F) or # + (cp >= 0x2B740 and cp <= 0x2B81F) or # + (cp >= 0x2B820 and cp <= 0x2CEAF) or + (cp >= 0xF900 and cp <= 0xFAFF) or # + (cp >= 0x2F800 and cp <= 0x2FA1F)): # + return True + + return False + + def _clean_text(self, text): + """Performs invalid character removal and whitespace cleanup on text.""" + output = [] + for char in text: + cp = ord(char) + if cp == 0 or cp == 0xfffd or _is_control(char): + continue + if _is_whitespace(char): + output.append(" ") + else: + output.append(char) + return "".join(output) + + +class WordpieceTokenizer(object): + """Runs WordPiece tokenziation.""" + + def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=200): + self.vocab = vocab + self.unk_token = unk_token + self.max_input_chars_per_word = max_input_chars_per_word + + def tokenize(self, text): + """Tokenizes a piece of text into its word pieces. + + This uses a greedy longest-match-first algorithm to perform tokenization + using the given vocabulary. + + For example: + input = "unaffable" + output = ["un", "##aff", "##able"] + + Args: + text: A single token or whitespace separated tokens. This should have + already been passed through `BasicTokenizer. + + Returns: + A list of wordpiece tokens. + """ + + text = convert_to_unicode(text) + + output_tokens = [] + for token in whitespace_tokenize(text): + chars = list(token) + if len(chars) > self.max_input_chars_per_word: + output_tokens.append(self.unk_token) + continue + + is_bad = False + start = 0 + sub_tokens = [] + while start < len(chars): + end = len(chars) + cur_substr = None + while start < end: + substr = "".join(chars[start:end]) + if start > 0: + substr = "##" + substr + if substr in self.vocab: + cur_substr = substr + break + end -= 1 + if cur_substr is None: + is_bad = True + break + sub_tokens.append(cur_substr) + start = end + + if is_bad: + output_tokens.append(self.unk_token) + else: + output_tokens.extend(sub_tokens) + return output_tokens + + +def _is_whitespace(char): + """Checks whether `chars` is a whitespace character.""" + # \t, \n, and \r are technically contorl characters but we treat them + # as whitespace since they are generally considered as such. + if char == " " or char == "\t" or char == "\n" or char == "\r": + return True + cat = unicodedata.category(char) + if cat == "Zs": + return True + return False + + +def _is_control(char): + """Checks whether `chars` is a control character.""" + # These are technically control characters but we count them as whitespace + # characters. + if char == "\t" or char == "\n" or char == "\r": + return False + cat = unicodedata.category(char) + if cat in ("Cc", "Cf"): + return True + return False + + +def _is_punctuation(char): + """Checks whether `chars` is a punctuation character.""" + cp = ord(char) + # We treat all non-letter/number ASCII as punctuation. + # Characters such as "^", "$", and "`" are not in the Unicode + # Punctuation class but we treat them as punctuation anyways, for + # consistency. + if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or + (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)): + return True + cat = unicodedata.category(char) + if cat.startswith("P"): + return True + return False diff --git a/tokenization_test.py b/tokenization_test.py new file mode 100644 index 0000000..0afaedd --- /dev/null +++ b/tokenization_test.py @@ -0,0 +1,137 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import tempfile +import tokenization +import six +import tensorflow as tf + + +class TokenizationTest(tf.test.TestCase): + + def test_full_tokenizer(self): + vocab_tokens = [ + "[UNK]", "[CLS]", "[SEP]", "want", "##want", "##ed", "wa", "un", "runn", + "##ing", "," + ] + with tempfile.NamedTemporaryFile(delete=False) as vocab_writer: + if six.PY2: + vocab_writer.write("".join([x + "\n" for x in vocab_tokens])) + else: + vocab_writer.write("".join( + [x + "\n" for x in vocab_tokens]).encode("utf-8")) + + vocab_file = vocab_writer.name + + tokenizer = tokenization.FullTokenizer(vocab_file) + os.unlink(vocab_file) + + tokens = tokenizer.tokenize(u"UNwant\u00E9d,running") + self.assertAllEqual(tokens, ["un", "##want", "##ed", ",", "runn", "##ing"]) + + self.assertAllEqual( + tokenizer.convert_tokens_to_ids(tokens), [7, 4, 5, 10, 8, 9]) + + def test_chinese(self): + tokenizer = tokenization.BasicTokenizer() + + self.assertAllEqual( + tokenizer.tokenize(u"ah\u535A\u63A8zz"), + [u"ah", u"\u535A", u"\u63A8", u"zz"]) + + def test_basic_tokenizer_lower(self): + tokenizer = tokenization.BasicTokenizer(do_lower_case=True) + + self.assertAllEqual( + tokenizer.tokenize(u" \tHeLLo!how \n Are yoU? "), + ["hello", "!", "how", "are", "you", "?"]) + self.assertAllEqual(tokenizer.tokenize(u"H\u00E9llo"), ["hello"]) + + def test_basic_tokenizer_no_lower(self): + tokenizer = tokenization.BasicTokenizer(do_lower_case=False) + + self.assertAllEqual( + tokenizer.tokenize(u" \tHeLLo!how \n Are yoU? "), + ["HeLLo", "!", "how", "Are", "yoU", "?"]) + + def test_wordpiece_tokenizer(self): + vocab_tokens = [ + "[UNK]", "[CLS]", "[SEP]", "want", "##want", "##ed", "wa", "un", "runn", + "##ing" + ] + + vocab = {} + for (i, token) in enumerate(vocab_tokens): + vocab[token] = i + tokenizer = tokenization.WordpieceTokenizer(vocab=vocab) + + self.assertAllEqual(tokenizer.tokenize(""), []) + + self.assertAllEqual( + tokenizer.tokenize("unwanted running"), + ["un", "##want", "##ed", "runn", "##ing"]) + + self.assertAllEqual( + tokenizer.tokenize("unwantedX running"), ["[UNK]", "runn", "##ing"]) + + def test_convert_tokens_to_ids(self): + vocab_tokens = [ + "[UNK]", "[CLS]", "[SEP]", "want", "##want", "##ed", "wa", "un", "runn", + "##ing" + ] + + vocab = {} + for (i, token) in enumerate(vocab_tokens): + vocab[token] = i + + self.assertAllEqual( + tokenization.convert_tokens_to_ids( + vocab, ["un", "##want", "##ed", "runn", "##ing"]), [7, 4, 5, 8, 9]) + + def test_is_whitespace(self): + self.assertTrue(tokenization._is_whitespace(u" ")) + self.assertTrue(tokenization._is_whitespace(u"\t")) + self.assertTrue(tokenization._is_whitespace(u"\r")) + self.assertTrue(tokenization._is_whitespace(u"\n")) + self.assertTrue(tokenization._is_whitespace(u"\u00A0")) + + self.assertFalse(tokenization._is_whitespace(u"A")) + self.assertFalse(tokenization._is_whitespace(u"-")) + + def test_is_control(self): + self.assertTrue(tokenization._is_control(u"\u0005")) + + self.assertFalse(tokenization._is_control(u"A")) + self.assertFalse(tokenization._is_control(u" ")) + self.assertFalse(tokenization._is_control(u"\t")) + self.assertFalse(tokenization._is_control(u"\r")) + self.assertFalse(tokenization._is_control(u"\U0001F4A9")) + + def test_is_punctuation(self): + self.assertTrue(tokenization._is_punctuation(u"-")) + self.assertTrue(tokenization._is_punctuation(u"$")) + self.assertTrue(tokenization._is_punctuation(u"`")) + self.assertTrue(tokenization._is_punctuation(u".")) + + self.assertFalse(tokenization._is_punctuation(u"A")) + self.assertFalse(tokenization._is_punctuation(u" ")) + + +if __name__ == "__main__": + tf.test.main()